21  Module 3 - Day 4

Topics

For today’s practice make use of notebook module3-day4.ipynb created in your enviroment. Shut down kernel for all previous notebooks (if in runing condition) by right cliking on notbeook on left hand side file browser

21.1 Working with databases

  • You will need a python connector for your database
  • Connector is different for each type of database. e.g. mysql provides its connector on the website. Sqilite3 connectors comes with python
  • How do you connect to database will be different.
  • But working with data, executing queries, fetching or adding data to databses these will be same
import sqlite3
conn = sqlite3.connect("training.db") # if the database file does not exits, it will create it
cur = conn.cursor()
cur.execute("create table person (name varchar(100), email varchar(100));")
cur.execute("insert into person (name, email) values ('alice', 'alice@wonder.land');")
conn.commit()
conn.close()
!ls training.db
training.db

21.2 Access data

conn = sqlite3.connect("training.db") # this open the database 
cur = conn.cursor() # this will work only if conn is open
results = cur.execute("select * from person") # this means select every field from person table
results # it is acually kind of iterator
<sqlite3.Cursor at 0x7f4f48ed2dc0>
nums = [1, 2, 3, 4, 5, 6]
rnums = reversed(nums)
rnums
<list_reverseiterator at 0x7f4f48b61540>
next(rnums)
6
results.fetchall() # usually data fetched from execute query will be a list of tuples
[('alice', 'alice@wonder.land')]
names = cur.execute("select name from person")
names.fetchall()
[('alice',)]
def find_person(conn, email):
    q = f"select * from person where email='{email}'" # this is not a good way
    print(q)
    cur = conn.cursor()
    r = cur.execute(q)
    return r.fetchall()
find_person(conn, "alice@wonder.land")
select * from person where email='alice@wonder.land'
[('alice', 'alice@wonder.land')]
cur = conn.cursor()
r = cur.execute("select * from person where email=?", ("alice@wonder.land", ))
r.fetchall()
[('alice', 'alice@wonder.land')]
def find_person_with_email(conn, email):
    q = f"select * from person where email=?"
    print(q)
    cur = conn.cursor()
    r = cur.execute(q, (email,))
    return r.fetchall()
[1]
[1]
(1) # this is not tuple
1
(1,) # this is tuple
(1,)
t = (1,)
t[0]
1
find_person_with_email(conn, "alice@wonder.land")
select * from person where email=?
[('alice', 'alice@wonder.land')]
def query(conn, querystring, params=None):
    if params is None:
        params = tuple()
    cur = conn.cursor()
    r = cur.execute(querystring, params)
    return r.fetchall()
query(conn, "insert into person (name, email) values (?,?)", ("alex", "alex@nyk.zoo"))
[]
find_person_with_email(conn, "alex@nyk.zoo")
select * from person where email=?
[('alex', 'alex@nyk.zoo')]
persons = {"xyz":"xyz@xyz.com",
          "abc": "abc@abc.com",
          "mno": "mno@mno.com"}
def insert_person(conn, name, email):
    return query(conn, "insert into person (name, email) values (?,?)", (name, email))
for n, e in persons.items():
    insert_person(conn, n, e)
query(conn, "select * from person")
[('alice', 'alice@wonder.land'),
 ('alex', 'alex@nyk.zoo'),
 ('xyz', 'xyz@xyz.com'),
 ('abc', 'abc@abc.com'),
 ('mno', 'mno@mno.com')]

21.3 Pandas and databases

import pandas as pd
csvlink = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
wallet = pd.read_csv(csvlink)
wallet
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

del wallet['Unnamed: 0']
wallet
date category description debit
0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ...
95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 4 columns

wallet.to_csv("/tmp/x.csv")
!head /tmp/x.csv
,date,category,description,debit
0,2021-03-07 14:53:28.377359,Music,Amazon,421.2073272347991
1,2020-10-08 09:53:28.377359,Food,Swiggy,328.4400802428426
2,2021-02-23 09:53:28.377359,Books,Amazon,244.67943701511356
3,2020-11-01 14:53:28.377359,Utility,Phone,222.7563175805277
4,2021-06-05 13:53:28.377359,Books,Flipcart,494.1284923793595
5,2021-07-28 19:53:28.377359,Utility,Electricity,219.94171130968408
6,2021-04-16 11:53:28.377359,Books,Amazon Kindle,270.32259514795845
7,2021-02-15 10:53:28.377359,Food,Zomato,457.1831036346536
8,2021-08-10 19:53:28.377359,Utility,Phone,151.49637259947792
wallet.to_sql("exepenses", con=conn, if_exists="replace")
100
r = query(conn, "select * from exepenses")
for item in r:expressions
    print(item)
(0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991)
(1, '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', 328.4400802428426)
(2, '2021-02-23 09:53:28.377359', 'Books', 'Amazon', 244.67943701511356)
(3, '2020-11-01 14:53:28.377359', 'Utility', 'Phone', 222.7563175805277)
(4, '2021-06-05 13:53:28.377359', 'Books', 'Flipcart', 494.1284923793595)
(5, '2021-07-28 19:53:28.377359', 'Utility', 'Electricity', 219.94171130968408)
(6, '2021-04-16 11:53:28.377359', 'Books', 'Amazon Kindle', 270.32259514795845)
(7, '2021-02-15 10:53:28.377359', 'Food', 'Zomato', 457.1831036346536)
(8, '2021-08-10 19:53:28.377359', 'Utility', 'Phone', 151.49637259947792)
(9, '2020-11-29 14:53:28.377359', 'Travel', 'Auto', 443.61888423247854)
(10, '2021-06-15 13:53:28.377359', 'Travel', 'Metro', 328.1754210974373)
(11, '2021-07-24 13:53:28.377359', 'Food', 'Zomato', 434.4954675355444)
(12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569)
(13, '2021-06-06 10:53:28.377359', 'Utility', 'Phone', 154.0449491816659)
(14, '2021-06-09 13:53:28.377359', 'Travel', 'Taxi', 485.2977429821982)
(15, '2021-08-24 17:53:28.377359', 'Food', 'Zomato', 262.9439932340398)
(16, '2021-03-05 19:53:28.377359', 'Utility', 'Phone', 390.31687619327926)
(17, '2021-04-17 18:53:28.377359', 'Utility', 'Electricity', 316.8786754246636)
(18, '2021-05-08 15:53:28.377359', 'Travel', 'Auto', 433.8224042777936)
(19, '2021-05-16 10:53:28.377359', 'Books', 'Flipcart', 109.32590886550068)
(20, '2020-10-12 18:53:28.377359', 'Travel', 'Auto', 365.9218082537661)
(21, '2021-01-04 19:53:28.377359', 'Travel', 'Metro', 329.09737150258513)
(22, '2021-06-24 15:53:28.377359', 'Food', 'Zomato', 489.1434830522253)
(23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919816)
(24, '2021-05-31 11:53:28.377359', 'Books', 'Amazon', 498.10049550461065)
(25, '2021-05-21 14:53:28.377359', 'Food', 'Hotel', 483.315863517772)
(26, '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', 138.806577801854)
(27, '2021-05-01 15:53:28.377359', 'Utility', 'Electricity', 103.68079074846584)
(28, '2020-12-14 15:53:28.377359', 'Utility', 'Phone', 358.4599327957656)
(29, '2021-06-20 10:53:28.377359', 'Utility', 'Electricity', 184.5577284049955)
(30, '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', 203.5292397894327)
(31, '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', 246.503527384528)
(32, '2021-06-23 11:53:28.377359', 'Food', 'Zomato', 345.03043608141513)
(33, '2021-05-14 18:53:28.377359', 'Food', 'Hotel', 449.24802955761743)
(34, '2021-05-14 10:53:28.377359', 'Utility', 'Phone', 499.8581815222449)
(35, '2021-02-18 18:53:28.377359', 'Travel', 'Metro', 441.6021430011205)
(36, '2020-12-10 10:53:28.377359', 'Travel', 'Auto', 472.94143917262176)
(37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673)
(38, '2021-08-15 10:53:28.377359', 'Travel', 'Auto', 494.1243994056571)
(39, '2021-05-17 17:53:28.377359', 'Food', 'Swiggy', 112.33316019807457)
(40, '2021-07-19 12:53:28.377359', 'Food', 'Swiggy', 291.54598801930536)
(41, '2021-02-20 19:53:28.377359', 'Utility', 'Phone', 425.18719068071806)
(42, '2021-08-22 17:53:28.377359', 'Food', 'Hotel', 210.2562695007857)
(43, '2020-09-21 12:53:28.377359', 'Utility', 'Phone', 486.0339327616073)
(44, '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', 257.92759337085425)
(45, '2021-05-27 16:53:28.377359', 'Utility', 'Electricity', 154.74287259516657)
(46, '2021-05-15 15:53:28.377359', 'Utility', 'Electricity', 359.3249716537848)
(47, '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', 310.408610004679)
(48, '2021-08-23 17:53:28.377359', 'Utility', 'Electricity', 310.05840961423314)
(49, '2021-03-16 09:53:28.377359', 'Music', 'spotify', 232.30340219121135)
(50, '2020-12-24 11:53:28.377359', 'Food', 'Zomato', 463.0018749263554)
(51, '2020-12-22 17:53:28.377359', 'Food', 'Zomato', 331.22702332837093)
(52, '2021-03-26 09:53:28.377359', 'Travel', 'Taxi', 403.6100701341934)
(53, '2021-01-27 09:53:28.377359', 'Utility', 'Electricity', 183.1866624101276)
(54, '2020-11-16 10:53:28.377359', 'Music', 'spotify', 160.81754340768396)
(55, '2021-01-21 19:53:28.377359', 'Books', 'Flipcart', 423.74970808720553)
(56, '2021-05-19 18:53:28.377359', 'Utility', 'Phone', 319.3428762684619)
(57, '2021-07-15 15:53:28.377359', 'Utility', 'Phone', 279.6090437716363)
(58, '2021-05-20 10:53:28.377359', 'Food', 'Hotel', 255.8710346734312)
(59, '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', 208.2329120852039)
(60, '2021-01-17 11:53:28.377359', 'Utility', 'Electricity', 382.5195101154448)
(61, '2021-02-25 13:53:28.377359', 'Food', 'Hotel', 124.65827844174062)
(62, '2021-01-27 19:53:28.377359', 'Books', 'Amazon Kindle', 497.7708601564023)
(63, '2021-05-10 11:53:28.377359', 'Travel', 'Taxi', 355.9890502253258)
(64, '2021-01-31 14:53:28.377359', 'Food', 'Zomato', 232.2223798622789)
(65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118)
(66, '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', 263.9577700340145)
(67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731)
(68, '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', 279.1478844739421)
(69, '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', 300.52462041935115)
(70, '2021-08-17 13:53:28.377359', 'Utility', 'Phone', 125.22977317126336)
(71, '2021-03-30 12:53:28.377359', 'Food', 'Swiggy', 245.36050838040904)
(72, '2021-06-30 18:53:28.377359', 'Books', 'Amazon', 294.6628689900488)
(73, '2021-08-15 17:53:28.377359', 'Travel', 'Metro', 117.58872931045572)
(74, '2021-03-20 11:53:28.377359', 'Travel', 'Taxi', 303.05542098520453)
(75, '2021-03-03 12:53:28.377359', 'Food', 'Hotel', 425.6252909948148)
(76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895)
(77, '2021-01-18 14:53:28.377359', 'Books', 'Amazon Kindle', 482.1523430204321)
(78, '2020-09-09 16:53:28.377359', 'Music', 'spotify', 415.3728938035302)
(79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651)
(80, '2021-02-17 09:53:28.377359', 'Food', 'Swiggy', 283.09570727160764)
(81, '2020-10-29 16:53:28.377359', 'Food', 'Hotel', 470.08099539923614)
(82, '2020-09-22 09:53:28.377359', 'Music', 'spotify', 411.14270120842224)
(83, '2021-03-18 09:53:28.377359', 'Books', 'Flipcart', 451.5844070294999)
(84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333)
(85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527)
(86, '2021-05-07 16:53:28.377359', 'Food', 'Zomato', 198.450671792638)
(87, '2021-05-19 15:53:28.377359', 'Food', 'Zomato', 378.82064134052473)
(88, '2021-04-18 09:53:28.377359', 'Utility', 'Phone', 124.2212478444578)
(89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263)
(90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416)
(91, '2021-01-22 17:53:28.377359', 'Food', 'Hotel', 232.66346838787223)
(92, '2021-01-12 19:53:28.377359', 'Travel', 'Taxi', 356.8426379886326)
(93, '2021-01-11 09:53:28.377359', 'Utility', 'Electricity', 111.72080867898062)
(94, '2021-01-04 13:53:28.377359', 'Utility', 'Phone', 431.1855366816298)
(95, '2021-07-19 13:53:28.377359', 'Utility', 'Phone', 388.6712132388421)
(96, '2021-01-12 19:53:28.377359', 'Books', 'Flipcart', 467.5545618966052)
(97, '2021-03-25 11:53:28.377359', 'Utility', 'Phone', 320.78943360123816)
(98, '2021-05-13 15:53:28.377359', 'Travel', 'Taxi', 442.0964693975505)
(99, '2020-10-11 16:53:28.377359', 'Food', 'Hotel', 100.45550129902664)
def mysum(*nums): # nums is variable number of unamed argumets
    s = 0
    for n in nums:
        s += n
    return s
mysum(1, 2)
3
mysum(1, 2, 3, 4, 5, 6, 7, 7, 8) # these are unnamed arguments
43
def foo(**kwargs): # ** means named arguments
    for name, value in kwargs.items():
        print(name, value)
foo(name="alice", email="alice@wonder.land", place="wanderland") # variable number named arguments
name alice
email alice@wonder.land
place wanderland
def select_query(conn, tablename, *selection, **conditions):
    cur = conn.cursor()
    SELECTION = ",".join(selection)
    CONDITIONS = " & ".join([f"{name} = ?" for name in conditions.keys()])
    params = tuple([conditions[name] for name in conditions.keys()])
    q = f"select {SELECTION} from {tablename} where {CONDITIONS}"
    print(q)
    print(params)
    return cur.execute(q, params)
select a, b, c, d from tablename where cond1=value and cond2 = value2...
wallet
date category description debit
0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ...
95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 4 columns

music = select_query(conn, "exepenses", "description", "debit", category="Music")
select description,debit from exepenses where category = ?
('Music',)
for items in music:
    print(items)
('Amazon', 421.2073272347991)
('Amazon', 329.5360031897569)
('Netflix', 354.9402409919816)
('Amazon', 266.0690783774673)
('spotify', 232.30340219121135)
('spotify', 160.81754340768396)
('Netflix', 188.7487426895118)
('Netflix', 324.786916846731)
('Netflix', 197.5346000167895)
('spotify', 415.3728938035302)
('Netflix', 321.7634156544651)
('spotify', 411.14270120842224)
('Netflix', 158.7936457269333)
('Amazon', 130.37490757527)
('Amazon', 218.487173429263)
('Amazon', 101.57327588889416)
df = pd.read_sql_query("select * from exepenses;", con=conn)
df
index date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

!ls upload.pdf
upload.pdf

21.4 Regular expressions

Regular are used to search patterns in text!

import re
empty_pattern = re.compile("^$") # empty line ... ^ means start of line, $ means end of line
lines_with_sigle_char = re.compile("^.$") # . means any char
date_like_pattern = re.compile(r"^\d\d\d\d-\d{1,2}-\d{1,2}") # \d is for digit, 
                                                            #{x, y} whatever is before this will come min x times and max y times
lines="""line1
1
2
3
ksjdksa dfjkh fdgjkh


2026-2-14
2025-12-12
sadas



fsdfsf"""
def matchpatterns(lines, p):
    for line in lines.split("\n"):
        if p.match(line):
            print("Found a patterns!")
            print(line)
matchpatterns(lines, empty_pattern)
Found a patterns!

Found a patterns!

Found a patterns!

Found a patterns!

Found a patterns!
matchpatterns(lines, lines_with_sigle_char)
Found a patterns!
1
Found a patterns!
2
Found a patterns!
3
matchpatterns(lines, date_like_pattern)
Found a patterns!
2026-2-14
Found a patterns!
2025-12-12

21.5 activating virtual env through .bat file

%%file jupyler-launch.bat
call c:\usrs\vikrant\trainingenv\Scripts\activate.bat
jupyterlab
Overwriting jupyler-launch.bat