5. IterablesΒΆ
alist = list() # linear, size not fixed, not hashable
atuple = tuple() # linear, fixed size, hashable
adict = dict() # hash table, not hashable, stores (key,value) pairs
aset = set() # hash table, like dict but only stores keys
acopy = alist.copy() # shallow copy
print(len(alist)) # gets size of any iterable type
0
# examplar tuple usage
# creating a dictionary to store ngram counts
d = dict()
d[("a","cat")] = 10
d[["a","cat"]] = 11
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-47597361a541> in <module>
3 d = dict()
4 d[("a","cat")] = 10
----> 5 d[["a","cat"]] = 11
TypeError: unhashable type: 'list'
"""
List: not hashable (i.e. can't use as dictionary key)
dynamic size
allows duplicates and inconsistent element types
dynamic array implementation
"""
# list creation
alist = [] # empty list, equivalent to list()
alist = [1,2,3,4,5] # initialized list
print(alist[0])
alist[0] = 5
print(alist)
print("-"*10)
# list indexing
print(alist[0]) # get first element (at index 0)
print(alist[-2]) # get last element (at index len-1)
print(alist[3:]) # get elements starting from index 3 (inclusive)
print(alist[:3]) # get elements stopping at index 3 (exclusive)
print(alist[2:4]) # get elements within index range [2,4)
print(alist[6:]) # prints nothing because index is out of range
print(alist[::-1]) # returns a reversed list
print("-"*10)
# list modification
alist.append("new item") # insert at end
alist.insert(0, "new item") # insert at index 0
alist.extend([2,3,4]) # concatenate lists
# above line is equivalent to alist += [2,3,4]
alist.index("new item") # search by content
alist.remove("new item") # remove by content
alist.pop(0) # remove by index
print(alist)
print("-"*10)
if "new item" in alist:
print("found")
else:
print("not found")
print("-"*10)
# list traversal
for ele in alist:
print(ele)
print("-"*10)
# or traverse with index
for i, ele in enumerate(alist):
print(i, ele)
1
[5, 2, 3, 4, 5]
----------
5
4
[4, 5]
[5, 2, 3]
[3, 4]
[]
[5, 4, 3, 2, 5]
----------
[2, 3, 4, 5, 'new item', 2, 3, 4]
----------
found
----------
2
3
4
5
new item
2
3
4
----------
0 2
1 3
2 4
3 5
4 new item
5 2
6 3
7 4
"""
Tuple: hashable (i.e. can use as dictionary key)
fixed size (no insertion or deletion)
"""
# it does not make sense to create empty tuples
atuple = (1,2,3,4,5)
# or you can cast other iterables to tuple
atuple = tuple([1,2,3])
# indexing and traversal are same as list
"""
Named tuples for readibility
"""
from collections import namedtuple
Point = namedtuple('Point', 'x y')
pt1 = Point(1.0, 5.0)
pt2 = Point(2.5, 1.5)
print(pt1.x, pt1.y)
1.0 5.0
"""
Dict: not hashable
dynamic size
no duplicates allowed
hash table implementation which is fast for searching
"""
# dict creation
adict = {} # empty dict, equivalent to dict()
adict = {'a':1, 'b':2, 'c':3}
print(adict)
# get all keys in dictionary
print(adict.keys())
# get value paired with key
print(adict['a'])
key = 'e'
# NOTE: accessing keys not in the dictionary leads to exception
if key in adict:
print(adict[key])
# add or modify dictionary entries
adict['e'] = 10 # insert new key
adict['e'] = 5 # modify existing keys
print("-"*10)
# traverse keys only
for key in adict:
print(key, adict[key])
print("-"*10)
# or traverse key-value pairs together
for key, value in adict.items():
print(key, value)
print("-"*10)
# NOTE: Checking if a key exists
key = 'e'
if key in adict: # NO .keys() here please!
print(adict[key])
else:
print("Not found!")
{'a': 1, 'b': 2, 'c': 3}
dict_keys(['a', 'b', 'c'])
1
----------
a 1
b 2
c 3
e 5
----------
a 1
b 2
c 3
e 5
----------
5
"""
Special dictionaries
"""
# set is a dictionary without values
aset = set()
aset.add('a')
# deduplication short-cut using set
alist = [1,2,3,3,3,4,3]
alist = list(set(alist))
print(alist)
# default_dictionary returns a value computed from a default function
# for non-existent entries
from collections import defaultdict
adict = defaultdict(lambda: 'unknown')
adict['cat'] = 'feline'
print(adict['cat'])
print(adict['dog'])
[1, 2, 3, 4]
feline
unknown
# counter is a dictionary with default value of 0
# and provides handy iterable counting tools
from collections import Counter
# initialize and modify empty counter
counter1 = Counter()
counter1['t'] = 10
counter1['t'] += 1
counter1['e'] += 1
print(counter1)
print("-"*10)
# initialize counter from iterable
counter2 = Counter("letters to be counted")
print(counter2)
print("-"*10)
# computations using counters
print("1", counter1 + counter2)
print("2,", counter1 - counter2)
print("3", counter1 or counter2) # or for intersection, and for union
Counter({'t': 11, 'e': 1})
----------
Counter({'e': 4, 't': 4, ' ': 3, 'o': 2, 'l': 1, 'r': 1, 's': 1, 'b': 1, 'c': 1, 'u': 1, 'n': 1, 'd': 1})
----------
1 Counter({'t': 15, 'e': 5, ' ': 3, 'o': 2, 'l': 1, 'r': 1, 's': 1, 'b': 1, 'c': 1, 'u': 1, 'n': 1, 'd': 1})
2, Counter({'t': 7})
3 Counter({'t': 11, 'e': 1})
# sorting
a = [4,6,1,7,0,5,1,8,9]
a = sorted(a)
print(a)
a = sorted(a, reverse=True)
print(a)
[0, 1, 1, 4, 5, 6, 7, 8, 9]
[9, 8, 7, 6, 5, 4, 1, 1, 0]
# sorting
a = [("cat",1), ("dog", 3), ("bird", 2)]
a = sorted(a)
print(a)
a = sorted(a, key=lambda x:x[1])
print(a)
[('bird', 2), ('cat', 1), ('dog', 3)]
[('cat', 1), ('bird', 2), ('dog', 3)]
# useful in dictionary sorting
adict = {'cat':3, 'bird':1}
print(sorted(adict.items(), key=lambda x:x[1]))
[('bird', 1), ('cat', 3)]
# Syntax sugar: one-line control flow + list operation
sent = ["i am good", "a beautiful day", "HELLO FRIEND"]
"""
for i in range(len(sent)):
sent[i] = sent[i].lower().split(" ")
"""
sent1 = [s.lower().split(" ") for s in sent]
print(sent1)
sent2 = [s.lower().split(" ") for s in sent if len(s) > 10]
print(sent2)
# Use this for deep copy!
# copy = [obj.copy() for obj in original]
[['i', 'am', 'good'], ['a', 'beautiful', 'day'], ['hello', 'friend']]
[['a', 'beautiful', 'day'], ['hello', 'friend']]
# Syntax sugar: * operator for repeating iterable elements
print("-"*10)
print([1]*10)
# Note: This only repeating by value
# So you cannot apply the trick on reference types
# To create a double list
# DONT
doublelist = [[]]*10
doublelist[0].append(1)
print(doublelist)
# DO
doublelist = [[] for _ in range(10)]
doublelist[0].append(1)
print(doublelist)
----------
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]]
[[1], [], [], [], [], [], [], [], [], []]