# code for loading the format for the notebook
import os
# path : store the current path to convert back to it later
path = os.getcwd()
os.chdir(os.path.join('..', '..', 'notebook_format'))
from formats import load_style
load_style(plot_style=False)
os.chdir(path)
# magic to print version
%load_ext watermark
%watermark -a 'Ethen' -d -t -v
Some of the materials are a condensed reimplementation from the resource: Python3 Cookbook Chapter 1. Data Structures and Algorithms, which originally was freely available online.
Example1: Unpacking a tuple.
# note that in Python,
# it's the "," that creates the tuple,
# so we technically do not need the ()
# around the 4, 5. But if we need to create
# a single element tuple, then we do need to ()
# e.g. (4,) would be a single element tuple
p = (4, 5)
x, y = p
print(x)
print(y)
This works for all types of sequences (iterables), including tuples, list, strings, generators, files.
Example2: If you want to discard some of the elements, simply use _
to represent it (You can use anything you want, this is just convention).
data = ['ACME', 50, 91.1, (2012, 12, 21)]
_, shares, price, _ = data
print(shares)
print(price)
Example1: Data that has name, e-mail and arbitrary number of telephone numbers.
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record
phone_numbers
The star expression will always unpack a list (including none).
Example2: Performing different actions when looping over different "tagged" tuples. "Tagged" simply means they have some known pattern or structure. e.g. The first element is a tag indicating what other information does this tag contain.
records = [
('foo', 1, 2),
('bar', 'hello'),
('foo', 3, 4)
]
def do_foo(x, y):
print('foo', x, y)
def do_bar(s):
print('bar', s)
for tag, *args in records:
if tag == 'foo':
do_foo(*args)
elif tag == 'bar':
do_bar(*args)
Example3: String manipulation and throwing away variables.
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *_, homedir, sh = line.split(':')
print(uname)
print(homedir)
print(sh)
Example1: A fix-sized queue that removes the oldest item when a new item is added and the queue is full.
from collections import deque
# specify the maxlen argument
q = deque(maxlen = 3)
q.append(1)
q.append(2)
q.append(3)
print(q)
q.append(4)
print(q)
Example2: A unbounded queue. You can pop and add item from both end with O(1).
q = deque()
q.append(1)
q.append(2)
q.append(3)
print(q)
q.appendleft(4)
print(q)
# removes the right-most element
print(q.pop())
print(q)
# removes the left-most element
print(q.popleft())
print(q)
Example1: nlargest()
and nsmallest()
.
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))
Example2: nlargest()
and nsmallest()
with more complex data structure.
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key = lambda s: s['price'])
cheap
When to use what:
nlargest()
and nsmallest()
if you are trying to find a relatively small number of items. min()
and max()
if you are simply trying to find the single smallest or largest item (N=1). sorted(items)[:N]
or sorted(items)[-N:]
if N is about the same size as the input.We can think of priority queue as a modified queue: instead of retrieving the next element by insertion time, it retrieves the highest-priority element. The priority of individual elements is decided by the ordering applied to their keys.
Priority queues are commonly used for dealing with scheduling problems. High-priority tasks on the system should take precedence over lower-priority tasks. By organizing pending tasks in a priority queue that uses the task urgency as the key, the task scheduler can allow the highest-priority tasks to run first.
Let’s take a look at how we can implement priority queues in Python using built-in data structures or data structures that ship with Python’s standard library.
from queue import PriorityQueue
q = PriorityQueue()
q.put((2, 'code'))
q.put((1, 'eat'))
q.put((3, 'sleep'))
while not q.empty():
# note that apart from returning
# the item from the queue, it will
# also remove it from the queue
next_item = q.get()
print(next_item)
As we can infer from the output, prioriy queue stores the elements by its priority, in this case the first element in the tuple. After from sorting primitive types such as integers, we can also sort objects that we've defined. To perform sorting on custom objects we need to implement the dunder methods for all 6 comparisons.
Operator | Method |
---|---|
== | __eq__ |
!= | __ne__ |
< | __le__ |
<= | __le__ |
> | __gt__ |
>= | __ge__ |
class Skill:
def __init__(self, priority, description):
self.priority = priority
self.description = description
def __eq__(self, other):
return self.priority == other.priority
def __ne__(self, other):
return self.priority != other.priority
def __lt__(self, other):
return self.priority < other.priority
def __le__(self, other):
return self.priority <= other.priority
def __gt__(self, other):
return self.priority > other.priority
def __ge__(self, other):
return self.priority >= other.priority
def __repr__(self):
return '{}: {}'.format(self.description, self.priority)
q = PriorityQueue()
q.put(Skill(5, 'R'))
q.put(Skill(10, 'Python'))
q.put(Skill(1, 'Java'))
while not q.empty():
next_item = q.get()
print(next_item)
from collections import OrderedDict
d = dict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
for key in d:
print(key, d[key], end = '; ')
print()
od = OrderedDict()
od['foo'] = 1
od['bar'] = 2
od['spam'] = 3
od['grok'] = 4
# the order remains the same compared to the dict
for key in od:
print(key, od[key], end = '; ')
Be aware that the size of an OrderedDict is more than twice as large as a normal dictionary!!
# one useful case may be keeping the order when converting it into json
import json
json.dumps(od)
Performing calculations (e.g., min, max, sorting, etc.) on a dictionary by convert the it into tuples of ( value, key ).
# company and stocks
prices = {'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55,
'HPQ': 37.20, 'FB': 10.75}
min_price = min(zip(prices.values(), prices.keys()))
print(min_price)
We can perform common set operations with dictionary .keys()
and .items()
without having to convert them into a set as they are unique.
a = {'x': 1, 'y': 2, 'z': 3}
b = {'w': 10, 'x': 11, 'y': 2}
# common keys
print(a.keys() & b.keys())
# keys in a that are not in b
print(a.keys() - b.keys())
# (key,value) pairs in common
print(a.items() & b.items())
# make a new dictionary with certain keys removed
c = {key: a[key] for key in a.keys() - {'z'}}
print(c)
# simply calling set() removes duplicated but does not retain the original order
a = [1, 5, 2, 1, 9, 1, 5, 10]
print(set(a))
def dedupe(items):
seen = set()
for item in items:
if item not in seen:
seen.add(item)
yield item
list(dedupe(a))
items = [0, 1, 2, 3, 4, 5, 6]
important = slice(2, 4, 1)
# instead of hardcoding the indexing slicing
# we name the slice so we will know what it means
# when we look at it later
print(items[2:4])
print(items[important])
# you can also look at various info of the slice
print(important.start)
print(important.stop)
print(important.step)
The most_common
functionality from Counter.
from collections import Counter
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
word_counts = Counter(words)
top_three = word_counts.most_common(3)
print(top_three)
Counter is simply a dictionary and we can manually increment the count.
morewords = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes']
for word in morewords:
word_counts[word] += 1
word_counts['eyes']
# we can also perform math operations between them
a = Counter(words)
b = Counter(morewords)
print(a + b)
By using the key
argument with the sorted
function we can accomplish a bit more complex operations when it comes to sorting. Note that key only accepts functions as its parameters, thus in the following we use a lambda to create an anonymous function and sort by the second element of the tuples.
students = [
('john', 'A', 15),
('jane', 'B', 12),
('dave', 'B', 10),
]
# sort by age , which is the last element
# in the three element tuple list
sorted(students, key = lambda x: x[2])
Instead of doing that we can use itemgetter
for convenience.
from operator import itemgetter
sorted(students, key = itemgetter(2))
The same notion also applies to sorting by dictionary key.
from operator import itemgetter
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
rows_by_fname = sorted(rows, key = itemgetter('fname'))
rows_by_fname
The method is faster then key = lambda r: r['fname']
. And we can assign multiple values inside the itemgetter().
There's also attrgetter
for performing sorting according to class object's attribute.
class Student:
def __init__(self, name, grade, age):
self.age = age
self.name = name
self.grade = grade
def __repr__(self):
return repr((self.name, self.grade, self.age))
student_objects = [
Student('john', 'A', 15),
Student('jane', 'B', 12),
Student('dave', 'B', 10)
]
# we can sort using the lambda anonymous function way
sorted(student_objects, key = lambda student: student.age)
from operator import attrgetter
# or sort using attrgetter, notice that we can pass in
# multiple arguments. So here we are sorting by grade then age
sorted(student_objects, key = attrgetter('grade', 'age'))
Create a tuple with names for clearity (compared with tuples) and also has the immutable feature.
from collections import namedtuple
# create the name tuple by assigning the field name
Color = namedtuple('Color', ['red', 'green', 'blue'])
color = Color(55, 55, 55)
# access the element using .field name
print(color.red)
# be aware that we can use index to access the element
# in the namedtuple, but this defeats the whole purpose
# of using namedtuple versus plain tuple, i.e. color.red
# is arguably more readable than color[0]
print(color[0])
namedtuple can be used as a replacement for a dictionary, which requires more space to store. However, be aware that a it's immutable.
# because of its immutable feature
# color.red = 75 # this will return an error
# use ._replace() if you really really really
# wish to change the value after creation
color = color._replace(red = 75)
color
Whenever you need a dictionary, and each value of the dictionary has to start with the default value, use defaultdict. A defaultdict will never raise a KeyError. Any key that does not exist gets the value returned by the default factory.
# example 1: Joe does not exist in the dictionary, return default value
from collections import defaultdict
ice_cream = defaultdict(lambda: 'Vanilla')
ice_cream['Sarah'] = 'Chunky Monkey'
ice_cream['Abdul'] = 'Butter Pecan'
print(ice_cream['Sarah'])
print(ice_cream['Joe'])
# example 2: Grouping with dictionaries
from collections import defaultdict
city_list = [('TX','Austin'), ('TX','Houston'), ('NY','Albany'), ('NY', 'Syracuse'),
('NY', 'Buffalo'), ('NY', 'Rochester'), ('TX', 'Dallas'), ('CA','Sacramento'),
('CA', 'Palo Alto'), ('GA', 'Atlanta')]
cities_by_state = defaultdict(list)
for state, city in city_list:
cities_by_state[state].append(city)
for state, cities in cities_by_state.items():
print(state, ", ".join(cities))
Example1: Replacing the values that don’t meet the criteria with a new value.
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
clip_neg = [m if m > 0 else 0 for m in mylist]
clip_neg
Example2: Use filter
when the filtering criteria can't be easily expressed.
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
try:
x = int(val)
return True
except ValueError:
return False
# filter returns an iterator, you'll have to manually
# convert it to a list
ivals = list(filter(is_int, values))
print(ivals)
Example3: itertools.compress()
for filtering a sequence with an accompanying boolean sequence. Suppose you want to make a list of all addresses where the corresponding count value was greater than 5.
from itertools import compress
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [0, 3, 10, 4, 1, 7, 6, 1]
more5 = [n > 5 for n in counts]
# it also returns an iterator
print(list(compress(addresses, more5)))
prices = {'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55,
'HPQ': 37.20, 'FB': 10.75}
# Make a dictionary of all prices over 200
p1 = {key: value for key, value in prices.items() if value > 200}
# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key: value for key, value in prices.items() if key in tech_names}
Example1: Notice that we do not need to write sum((x * x for x in nums))
to first create the generator.
nums = [1, 2, 3, 4, 5]
s = sum(x * x for x in nums)
s
Example2: Determine if any .py files exist in a directory.
"""
import os
files = os.listdir('dirname')
if any(name.endswith('.py') for name in files):
print('There be python!')
else:
print('Sorry, no python.')
"""
print("uncomment to run, but there's no directory named 'dirname'")
For else let's us remove extraneous flag variables. Examples with and without for ... else ...
def contains_even_number1(l):
"""Prints whether or not the list l contains an even number."""
has_even_number = False
for element in l:
if element % 2 == 0:
has_even_number = True
break
if has_even_number:
print("list contains an even number")
else:
print("list does not contain an even number")
def contains_even_number2(l):
"""Prints whether or not the list l contains an even number."""
for element in l:
if element % 2 == 0:
print("list contains an even number")
break
# we can think of the else statement as "if no break"
else:
print("list does not contain an even number")
list1 = [3, 5, 8]
contains_even_number1(list1)
contains_even_number2(list1)
Note that Python also has a try, except, else expression. In this scenario, else will evaluate only if there is no exception from the try block, allowing us to simplify the more complicated code below:
# we have a sentinel flag indicating whether our try succeeded without any exception.
no_error = False
try:
try_this(whatever)
no_error = True
except SomeException as the_exception:
handle(the_exception)
if no_error:
return something
We can re-write the flow above using else:
try:
try_this(whatever)
except SomeException as the_exception:
handle(the_exception)
else: # only execute when there is no exception from the try block
return something
The reason why this type of flow control exists is because A try block allows us to handle an "expected" error. The except block should only catch exceptions we are prepared to handle. If we handle an unexpected error, our code may do the wrong thing and hide bugs.