15  Module 2 - Day 4

Topics

For today’s practice make use of notebook module2-day4.ipynb created in your enviroment. Shut down kernel for all previous notebooks (if in runing condition) by right cliking on notbeook on left hand side file browser

15.1 Dictionary

person = {"name":"Vikrant",
          "email":"vikrant@pipalacademy", 
          "address":"Pune"}
person['name']
'Vikrant'
person['company']
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[3], line 1
----> 1 person['company']

KeyError: 'company'
person.get("company", "arcesium")
'arcesium'
person
{'name': 'Vikrant', 'email': 'vikrant@pipalacademy', 'address': 'Pune'}
person.setdefault("company", "arcesium")
'arcesium'
person
{'name': 'Vikrant',
 'email': 'vikrant@pipalacademy',
 'address': 'Pune',
 'company': 'arcesium'}
for key in person:
    print(key)
name
email
address
company
for key, value in person.items():
    print(key, value)
name Vikrant
email vikrant@pipalacademy
address Pune
company arcesium
for value in person.values():
    print(value)
Vikrant
vikrant@pipalacademy
Pune
arcesium
list(person.keys())
['name', 'email', 'address', 'company']
list(person.values())
['Vikrant', 'vikrant@pipalacademy', 'Pune', 'arcesium']

15.1.1 dictioanry example

Some words are given in a files. We want to count each word. how many times each word in repeated in the file.

%%file words.txt
two
five five
one two four
one two nine three
one four two seven two
four two five four seven two
two two three five five three two
two three two six two seven five seven
five seven two five nine six seven six seven
three five five six three four seven two one six
five two five seven two four four five one two two
six six nine two four seven five three five five four seven
one eight two one two three eight one four seven six two four
two four five five eight nine eight two four three six six two one
six two five one five one four two four one one ten three ten four
three two two five four three eight one nine three two nine nine seven five seven
seven five five six seven two three two two one four nine two seven seven five six
two five five one two two one five four five six one one five seven six five two
two six one five nine two two two five nine five nine five eight five four one seven one
Writing words.txt

steps task input output - read the file filename contents-text - get the words contents list of words - counting list of words dictionary key is word value is count - finalizing the answer key will be word and count will be value

nums = [1, 2, 3, 4, 4, 5, 5, 56, 6, 7, 7, 78]
set(nums)
{1, 2, 3, 4, 5, 6, 7, 56, 78}
text = "one two three"
words = text.split()
words
['one', 'two', 'three']
multiline = """this is line one
yet another line
one more
and this is last"""
multiline.split()
['this',
 'is',
 'line',
 'one',
 'yet',
 'another',
 'line',
 'one',
 'more',
 'and',
 'this',
 'is',
 'last']
%%file x.csv
jkdhfasd
ksdhfklasd
kjhsdf kjsdfhjkasd  asdkjfh
kdjasfhjkds
Writing x.csv
with open("x.csv") as f:
    print(f.read())
jkdhfasd
ksdhfklasd
kjhsdf kjsdfhjkasd  asdkjfh
kdjasfhjkds
with open("x.csv") as f:
    f.read().split()`
   # File name (use the exact name as your file)
filename = "words.txt"
word = "one"
word_count = {}

with open(filename, "r") as file:
    for line in file:
        words = line.split()   # this means you will get only words from last line 
        if word:
            word_count[word] = word_count.get(word, 0) + 1

for word, count in word_count.items():
    print(f"{word}: {count}")
 
with open("x.csv") as f:
    for w in f.read().split():
        print(w)
jkdhfasd
ksdhfklasd
kjhsdf
kjsdfhjkasd
asdkjfh
kdjasfhjkds
with open("x.csv") as f:
    for line in f:
        print(line, end="")
jkdhfasd
ksdhfklasd
kjhsdf kjsdfhjkasd  asdkjfh
kdjasfhjkds
with open("x.csv") as f:
    for ch in f.read():
        print(ch)
j
k
d
h
f
a
s
d


k
s
d
h
f
k
l
a
s
d


k
j
h
s
d
f
 
k
j
s
d
f
h
j
k
a
s
d
 
 
a
s
d
k
j
f
h


k
d
j
a
s
f
h
j
k
d
s

what are you iterating on will be decided by on what you are puting a loop - for loop on filehandle will give lines - for loop in filehandle.read() will given characters - for lopp in filehandle.read().split() will give words

steps

  task             input          output
- read the file    filename       contents-text
- get the words    contents       list of words
- counting         list of words  dictionary key is word value is count
  • finalizing the answer key will be word and count will be value
1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 2 2 2 2 2 2 2


observations
1 ++++++
2

def get_words(filename):
    with open(filename) as f:
        return f.read().split()

def word_count(words):
    observations = {}
    for w in words:
        if w in observations:
            observations[w] += 1
        else:
            observations[w] = 1
    return observations
words = get_words("words.txt")
word_count(words)
{'two': 43,
 'five': 36,
 'one': 22,
 'four': 20,
 'nine': 11,
 'three': 14,
 'seven': 20,
 'six': 16,
 'eight': 6,
 'ten': 2}
def word_count(words):
    unique = set(words)
    count = {}
    for w in unique:
        count[w] = words.count(w)
    return count

def word_count(words):
    unique = set(words)
    return {w:words.count(w) for w in words}
word_count(words)
{'two': 43,
 'five': 36,
 'one': 22,
 'four': 20,
 'nine': 11,
 'three': 14,
 'seven': 20,
 'six': 16,
 'eight': 6,
 'ten': 2}

def word_count(words):
    observations = {}
    for w in words:
        observations[w] = observations.setdefault(w, 0) + 1
    return observations
person.setdefault("company", "pipal academy")
'arcesium'
person
{'name': 'Vikrant',
 'email': 'vikrant@pipalacademy',
 'address': 'Pune',
 'company': 'arcesium'}
w = {"one":3}
w.setdefault("one", 0)
3
w.setdefault("two", 0)
0
w
{'one': 3, 'two': 0}
word_count(words)
{'two': 43,
 'five': 36,
 'one': 22,
 'four': 20,
 'nine': 11,
 'three': 14,
 'seven': 20,
 'six': 16,
 'eight': 6,
 'ten': 2}
wcount = word_count(words)
wcount
{'two': 43,
 'five': 36,
 'one': 22,
 'four': 20,
 'nine': 11,
 'three': 14,
 'seven': 20,
 'six': 16,
 'eight': 6,
 'ten': 2}
def get_value(pair):
    return pair[1]
    
for w, c in sorted(wcount.items(), key=get_value):
    print(w, c)
ten 2
eight 6
nine 11
three 14
six 16
four 20
seven 20
one 22
five 36
two 43
list(wcount.items())
[('two', 43),
 ('five', 36),
 ('one', 22),
 ('four', 20),
 ('nine', 11),
 ('three', 14),
 ('seven', 20),
 ('six', 16),
 ('eight', 6),
 ('ten', 2)]
def get_value(pair):
    return pair[1]
    
for w, c in sorted(wcount.items(), key=get_value, reverse=True):
    print(w, c)
two 43
five 36
one 22
four 20
seven 20
six 16
three 14
nine 11
eight 6
ten 2
def get_value(pair):
    return pair[1]
    
for w, c in sorted(wcount.items(), key=get_value, reverse=True):
    print(w.rjust(5), c)
  two 43
 five 36
  one 22
 four 20
seven 20
  six 16
three 14
 nine 11
eight 6
  ten 2
def get_value(pair):
    return pair[1]
    
for w, c in sorted(wcount.items(), key=get_value, reverse=True):
    print(w.rjust(5), c, "."*c)
  two 43 ...........................................
 five 36 ....................................
  one 22 ......................
 four 20 ....................
seven 20 ....................
  six 16 ................
three 14 ..............
 nine 11 ...........
eight 6 ......
  ten 2 ..
def get_value(pair):
    return pair[1]
    
for w, c in sorted(wcount.items(), key=get_value, reverse=True):
    print(f"{w.rjust(5)} {c:2d}", "."*c)
  two 43 ...........................................
 five 36 ....................................
  one 22 ......................
 four 20 ....................
seven 20 ....................
  six 16 ................
three 14 ..............
 nine 11 ...........
eight  6 ......
  ten  2 ..
def get_value(pair):
    return pair[1]
    
for w, c in sorted(wcount.items(), key=get_value, reverse=True):
    print(f"{w.rjust(5)} {c:2d}", "*"*c)
  two 43 *******************************************
 five 36 ************************************
  one 22 **********************
 four 20 ********************
seven 20 ********************
  six 16 ****************
three 14 **************
 nine 11 ***********
eight  6 ******
  ten  2 **
d = {"x": 1}
d['x'] = d['x'] + 1
d
{'x': 2}
d['x']
2
del d['x']
d['X'] = 2
d
{'X': 2}
d['X'] += 1

15.2 classification

%%file users.csv
UDNUB,Marathi
ITPSA,Marathi
XWKLP,Hindi
QQTAR,Hindi
ASRUF,Tamil
ZMMKL,Marathi
MICLK,Telugu
YDYMU,Tamil
ZIZLU,Marathi
UTNQU,Marathi
EMGOG,Tamil
XARIR,Gujrathi
DWFXB,Telugu
JMYAW,Tamil
SYTWW,Gujrathi
HEYDO,Telugu
FKYXN,Telugu
DVVBW,Tamil
VGLUP,Marathi
DUHQK,Tamil
LYUEB,Hindi
ZJZRT,Telugu
INIBW,Gujrathi
VQKLP,Telugu
TNYZD,Tamil
SMVQN,Hindi
BYSHD,Tamil
VADUG,Telugu
OLBOR,Gujrathi
DPEPH,Telugu
Writing users.csv
- read the file
- get the data as dictionary where user is key and language is value
- classification 
SMVQN,Hindi
BYSHD,Tamil
VADUG,Telugu
OLBOR,Gujrathi
DPEPH,Telugu
LYUEB,Hindi
ZJZRT,Telugu


SMVQN,Hindi
BYSHD,Tamil
VADUG,Telugu
OLBOR,Gujrathi
DPEPH,Telugu

classes = {"Hindi":['SMVQN'],
           "Tamil": ['BYSHD'],
            "Telugu" : ['VADUG', 'DPEPH'],
            "Gujrathi": ['OLBOR']}
def readpairs(filename):
    d = {}
    with open(filename) as f:
        for line in f:
            user, languange = line.strip().split(",")
            d[user] = languange 
    return d
    
def classify(filename):
    pass
readpairs("users.csv")
{'UDNUB': 'Marathi',
 'ITPSA': 'Marathi',
 'XWKLP': 'Hindi',
 'QQTAR': 'Hindi',
 'ASRUF': 'Tamil',
 'ZMMKL': 'Marathi',
 'MICLK': 'Telugu',
 'YDYMU': 'Tamil',
 'ZIZLU': 'Marathi',
 'UTNQU': 'Marathi',
 'EMGOG': 'Tamil',
 'XARIR': 'Gujrathi',
 'DWFXB': 'Telugu',
 'JMYAW': 'Tamil',
 'SYTWW': 'Gujrathi',
 'HEYDO': 'Telugu',
 'FKYXN': 'Telugu',
 'DVVBW': 'Tamil',
 'VGLUP': 'Marathi',
 'DUHQK': 'Tamil',
 'LYUEB': 'Hindi',
 'ZJZRT': 'Telugu',
 'INIBW': 'Gujrathi',
 'VQKLP': 'Telugu',
 'TNYZD': 'Tamil',
 'SMVQN': 'Hindi',
 'BYSHD': 'Tamil',
 'VADUG': 'Telugu',
 'OLBOR': 'Gujrathi',
 'DPEPH': 'Telugu'}
def readpairs(filename):
    d = {}
    with open(filename) as f:
        for line in f:
            user, languange = line.strip().split(",")
            d[user] = languange 
    return d
    
def classify(filename):
    pairs = readpairs(filename)
    lclass = {}
    for user, language in pairs.items():
        lclass.setdefault(language, []).append(user)
    return lclass
        
classify("users.csv")
{'Marathi': ['UDNUB', 'ITPSA', 'ZMMKL', 'ZIZLU', 'UTNQU', 'VGLUP'],
 'Hindi': ['XWKLP', 'QQTAR', 'LYUEB', 'SMVQN'],
 'Tamil': ['ASRUF',
  'YDYMU',
  'EMGOG',
  'JMYAW',
  'DVVBW',
  'DUHQK',
  'TNYZD',
  'BYSHD'],
 'Telugu': ['MICLK',
  'DWFXB',
  'HEYDO',
  'FKYXN',
  'ZJZRT',
  'VQKLP',
  'VADUG',
  'DPEPH'],
 'Gujrathi': ['XARIR', 'SYTWW', 'INIBW', 'OLBOR']}
for k, v in classify("users.csv").items():
    print(k, ":", ",".join(v))
Marathi : UDNUB,ITPSA,ZMMKL,ZIZLU,UTNQU,VGLUP
Hindi : XWKLP,QQTAR,LYUEB,SMVQN
Tamil : ASRUF,YDYMU,EMGOG,JMYAW,DVVBW,DUHQK,TNYZD,BYSHD
Telugu : MICLK,DWFXB,HEYDO,FKYXN,ZJZRT,VQKLP,VADUG,DPEPH
Gujrathi : XARIR,SYTWW,INIBW,OLBOR

15.2.1 creating dictionries using dictionary comprehension

names = ['APPLE', 'IBM', 'AT&T', 'AGILENT']
values = [700.5, 300.1, 355.7, 600.3]
{x:y for x, y in zip(names, values)}
{'APPLE': 700.5, 'IBM': 300.1, 'AT&T': 355.7, 'AGILENT': 600.3}
dict(zip(names, values))
{'APPLE': 700.5, 'IBM': 300.1, 'AT&T': 355.7, 'AGILENT': 600.3}
stocks = dict(zip(names, values))
{k:v for k,v in stocks.items() if v > 400}
{'APPLE': 700.5, 'AGILENT': 600.3}
{k:v for k,v in stocks.items() if k.startswith("A")}
{'APPLE': 700.5, 'AT&T': 355.7, 'AGILENT': 600.3}