Notes taken from Northwestern University Data Science Bootcamp and online materials
- Python notes
- integer
- float
- boolean
- string
- list
- tuple
- set
- dictionary
Everything about formatting: documentation
| creation | ordered | mixed data types | elements accessed by | mutable | repeatable | |
|---|---|---|---|---|---|---|
| list | [] | y | y | index | y | y |
| tuple | () | y | y | index | n | y |
| set | {} | n | y | key | y | n |
| dictionry | {} | n | y | key | y | n |
//, truncating division%, remainder**, exponentiation
==, note assignment vs equality test!=andornot
-
.capitalize(), capitalizes the first character -
.lower(), makes the entire string lowercase -
.upper(), makes the entire string uppercase -
.title(), capitalizes every word in a string -
.strip(' '), strip away characters from the right side -
.strip(), remove all whitespaces from both sides -
.lstrip(' '), strip away characters starting from the left -
.split(','), split -
','.join(['a','b']), opposite of split -
.isalpha, check if all of the characters are alphabetical -
.isnumeric, check if the string is a number -
"Hi my name is {}. My hobby is {}?".format(name, hobby) -
f"Hi my name is {name}. My hobby is {hobby}", f-string, only available in Python 3.6+ -
"{:.1%}".format(percentage), formatting stringFind substrings inside of strings:
-
.find('the'), returns index if found, or-1if not found -
.index('the'), returns index if found, orValueErrorif not found
-
items = [] items.append('item1') items.extend(['item2', 'item3']) items + ['item4'] items.insert(4, 'item5')
items[0] = 'item1_new' items.index('item5')
items.pop(4) items.remove('item4') del items[-1]
-
zip(names, ages) # zip multiple lists into a list of tuples
-
items.reverse() # methods act on the variable directly items.sort() reversed(items) # functions keep the original variable unchanged sorted(items)
-
for index, name in enumerate(names): # enumerate creates a list of tuples print(f"Name {name} is at index {index}")
-
price_strings = ['24', '13', '1'] price_nums = [int(price) for price in price_strings] price_new = [int(price) for price in price_strings if price != '1']
-
penny = (60, 'yellow') penny + ('amber', ) # add a , so () is different from math operation
-
pets = {} pets.add('bulldog') pets.discard('bulldog')
-
set1 = {} set2 = {} set1.intersection(set2) # intersection set1.union(set2) # union set1.difference(set2) # difference set1 - (intersection set1 and set2)
-
roster = {} roster['Favorite Sport'] = 'Soccer' # add a new item roster.update({'Favorite Sports Team': 'S. L. Benfica', 'Favorite Sports Team Mascot': 'Eagle'}) # add multiple new items del roster['Favorite Sports Team Mascot'] print( roster.pop('Favorite Sports Team') )
-
roster.items() # returns iterators roster.keys() roster.values()
if 'a' in 'abcd':
...
elif ...:
...
else:
...while ...:
...for i in range(1, 5):
...try:
number = int(number_to_square)
print("Your number squared is ", number**2)
except:
print("You didn't enter an integer!")def square(number):
return square_of_number
assert(square(3) == 9)import pdb; pdb.set_trace() # code will run up to this line-
file_in = open('Data/ages.csv', 'r') lines_str = file_in.read() # read file into a string lines_list = file_in.readlines() # read file into a list of strings file_in.close
-
with open('Data/ages.csv', 'r') as file_in: lines_list = file_in.readlines()
-
import csv with open('Data/ages.csv', newline='') as file_in: csvreader = csv.reader(file_in, delimiter=',') next(csvreader, None) # skip header for row in csvreader: print(row)
-
import csv with open('names.csv', newline='') as file_in: reader = csv.DictReader(file_in) for row in reader: print(row['first_name']
-
jsonstores complex data structures that take on these forms:-
object (e.g., dictionary)
-
array
import json with open('records.json', 'r') as file_in: loaded_records = json.load(file_in)
-
-
delimiter = ',' file_out = open('../Data/TA_ages.csv', 'w') for name, age in all_records: file_out.write(name + delimiter + str(age) + '\n') file_out.close()
-
delimiter = ',' with open('../Data/TA_ages.csv', 'w') as file_out: for name, age in all_records.items(): file_out.write(name + delimiter + str(age) + '\n') file_out.close()
-
delimiter = ',' import csv with open(output_path, 'w', newline='') as file_out: csvwriter = csv.writer(file_out, delimiter=',') csvwriter.writerow(['', '', ''])
-
import csv with open('names.csv', 'w', newline='') as csvfile: fieldnames = ['first_name', 'last_name'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'}) # writer.writerow(list_of_dictionary)
-
import json with open('records.json', 'w') as file_out: json.dump(all_records, file_out)
-
# Python has a file containing a dictionary of encoding names and associated aliases from encodings.aliases import aliases alias_values = set(aliases.values()) for alias in alias_values: try: df = pd.read_csv('mystery.csv', encoding=alias) print(alias) except: pass
-
import chardet # use the detect method to find the encoding # 'rb' means read in the file as binary with open("mystery.csv", 'rb') as file: print(chardet.detect(file.read()))
-
random.random(), returns a number in the range [0.0, 1.0)
random.randint(a, b), returns an integer in the range [a, b]
random.choice(x), randomly returns a value from the sequence x
random.sample(x, y), randomly returns a sample of length y from the sequence x without replacement -
os.getcwd(), get current directory
os.listdir(directory_name), list of files in the directory
os.path.join(file_directory, 'file_name.txt')print(os.path.exists(file_path)), check if the path exists -
glob.glob(directory_name + '/*.py'), return a list of paths matching a pathname pattern -
time.sleep(x), pauses for x seconds
time.time(), gets current time in seconds -
today = datetime.date.today()
print(today)
print(today.day)
print(today.month)
print(today.year)birthday = datetime.date(1984, 2, 25)
print(birthday)
print(birthday.day)
print(birthday.month)
print(birthday.year)raw_time = "Mon May 21 20:50:07 +0000 2018" datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")
diff_seconds = (converted_timestamps0 - converted_timestamps1).seconds
-
copy.copy(x), shallow copy of x
copy.deepcopy(x), deep copy of x -
x.sort(key=operator.itemgetter(2)), sort based off the 3nd value of each list in x -
collections.Counter, counts repeated instances from an iterable
def function_name(input_var1, input_var2 = "Anna"):
# var2 has a default and is optional
"""
Return all roster filenames in directory
input:
input_var - str, Directory that contains the roster files
output:
output_var - list, List of roster filenames in directory
"""
statements
return output_var1, output_var2 # a tuple of both variables are returnedoutput_var1, output_var2 = function_name(input_var1, input_var2) # unpacking
print(function_name.__doc__) # print the docstring-
class Dog(): # always capitalize class names # Utilize the Python constructor to initialize the object def __init__(self, name, color): self.name = name self.color = color
-
dog = Dog('Fido', 'brown')
-
print(dog.name) print(dog.color)
-
# Define the Film class class Film(): # A required function to initialize a film object def __init__(self, name, length, release_year, language): self.name = name self.length = length self.release_year = release_year self.language = language
-
# An object belonging to the Film class star_wars = Film("Star Wars", 121, 1977, "English")
-
# Define the Expert class class Expert(): expert_count = 0 # A required function to initialize the class object def __init__(self, name): self.name = name Expert.expert_count += 1 # A method that takes another object as its argument def boast(self, obj): # Print out Expert object's name print("Hi. My name is", self.name) # Print out the name of the Film class object print("I know a lot about", obj.name) print("It is", obj.length, "minutes long") print("It was released in", obj.release_year) print("It is in", obj.language)
-
expert = Expert("Elbert") expert.boast(star_wars)