import sqlite321 Module 3 - Day 4
Topics
- Connecting to databases (sqlite3 )
- database connectors
- connecting and executing queries
- Pitfalls in reading/writing to databases.
For today’s practice make use of notebook module3-day4.ipynb created in your enviroment. Shut down kernel for all previous notebooks (if in runing condition) by right cliking on notbeook on left hand side file browser
21.1 Working with databases
- You will need a python connector for your database
- Connector is different for each type of database. e.g. mysql provides its connector on the website. Sqilite3 connectors comes with python
- How do you connect to database will be different.
- But working with data, executing queries, fetching or adding data to databses these will be same
conn = sqlite3.connect("training.db") # if the database file does not exits, it will create it
cur = conn.cursor()
cur.execute("create table person (name varchar(100), email varchar(100));")cur.execute("insert into person (name, email) values ('alice', 'alice@wonder.land');")
conn.commit()
conn.close()!ls training.dbtraining.db
21.2 Access data
conn = sqlite3.connect("training.db") # this open the database cur = conn.cursor() # this will work only if conn is openresults = cur.execute("select * from person") # this means select every field from person tableresults # it is acually kind of iterator<sqlite3.Cursor at 0x7f4f48ed2dc0>
nums = [1, 2, 3, 4, 5, 6]rnums = reversed(nums)rnums<list_reverseiterator at 0x7f4f48b61540>
next(rnums)6
results.fetchall() # usually data fetched from execute query will be a list of tuples[('alice', 'alice@wonder.land')]
names = cur.execute("select name from person")names.fetchall()[('alice',)]
def find_person(conn, email):
q = f"select * from person where email='{email}'" # this is not a good way
print(q)
cur = conn.cursor()
r = cur.execute(q)
return r.fetchall()find_person(conn, "alice@wonder.land")select * from person where email='alice@wonder.land'
[('alice', 'alice@wonder.land')]
cur = conn.cursor()
r = cur.execute("select * from person where email=?", ("alice@wonder.land", ))r.fetchall()[('alice', 'alice@wonder.land')]
def find_person_with_email(conn, email):
q = f"select * from person where email=?"
print(q)
cur = conn.cursor()
r = cur.execute(q, (email,))
return r.fetchall()[1][1]
(1) # this is not tuple1
(1,) # this is tuple(1,)
t = (1,)t[0]1
find_person_with_email(conn, "alice@wonder.land")select * from person where email=?
[('alice', 'alice@wonder.land')]
def query(conn, querystring, params=None):
if params is None:
params = tuple()
cur = conn.cursor()
r = cur.execute(querystring, params)
return r.fetchall()query(conn, "insert into person (name, email) values (?,?)", ("alex", "alex@nyk.zoo"))[]
find_person_with_email(conn, "alex@nyk.zoo")select * from person where email=?
[('alex', 'alex@nyk.zoo')]
persons = {"xyz":"xyz@xyz.com",
"abc": "abc@abc.com",
"mno": "mno@mno.com"}def insert_person(conn, name, email):
return query(conn, "insert into person (name, email) values (?,?)", (name, email))for n, e in persons.items():
insert_person(conn, n, e)query(conn, "select * from person")[('alice', 'alice@wonder.land'),
('alex', 'alex@nyk.zoo'),
('xyz', 'xyz@xyz.com'),
('abc', 'abc@abc.com'),
('mno', 'mno@mno.com')]
21.3 Pandas and databases
import pandas as pdcsvlink = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
wallet = pd.read_csv(csvlink)wallet| Unnamed: 0 | date | category | description | debit | |
|---|---|---|---|---|---|
| 0 | 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... | ... |
| 95 | 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 5 columns
del wallet['Unnamed: 0']wallet| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... |
| 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 4 columns
wallet.to_csv("/tmp/x.csv")!head /tmp/x.csv,date,category,description,debit
0,2021-03-07 14:53:28.377359,Music,Amazon,421.2073272347991
1,2020-10-08 09:53:28.377359,Food,Swiggy,328.4400802428426
2,2021-02-23 09:53:28.377359,Books,Amazon,244.67943701511356
3,2020-11-01 14:53:28.377359,Utility,Phone,222.7563175805277
4,2021-06-05 13:53:28.377359,Books,Flipcart,494.1284923793595
5,2021-07-28 19:53:28.377359,Utility,Electricity,219.94171130968408
6,2021-04-16 11:53:28.377359,Books,Amazon Kindle,270.32259514795845
7,2021-02-15 10:53:28.377359,Food,Zomato,457.1831036346536
8,2021-08-10 19:53:28.377359,Utility,Phone,151.49637259947792
wallet.to_sql("exepenses", con=conn, if_exists="replace")100
r = query(conn, "select * from exepenses")for item in r:expressions
print(item)(0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991)
(1, '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', 328.4400802428426)
(2, '2021-02-23 09:53:28.377359', 'Books', 'Amazon', 244.67943701511356)
(3, '2020-11-01 14:53:28.377359', 'Utility', 'Phone', 222.7563175805277)
(4, '2021-06-05 13:53:28.377359', 'Books', 'Flipcart', 494.1284923793595)
(5, '2021-07-28 19:53:28.377359', 'Utility', 'Electricity', 219.94171130968408)
(6, '2021-04-16 11:53:28.377359', 'Books', 'Amazon Kindle', 270.32259514795845)
(7, '2021-02-15 10:53:28.377359', 'Food', 'Zomato', 457.1831036346536)
(8, '2021-08-10 19:53:28.377359', 'Utility', 'Phone', 151.49637259947792)
(9, '2020-11-29 14:53:28.377359', 'Travel', 'Auto', 443.61888423247854)
(10, '2021-06-15 13:53:28.377359', 'Travel', 'Metro', 328.1754210974373)
(11, '2021-07-24 13:53:28.377359', 'Food', 'Zomato', 434.4954675355444)
(12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569)
(13, '2021-06-06 10:53:28.377359', 'Utility', 'Phone', 154.0449491816659)
(14, '2021-06-09 13:53:28.377359', 'Travel', 'Taxi', 485.2977429821982)
(15, '2021-08-24 17:53:28.377359', 'Food', 'Zomato', 262.9439932340398)
(16, '2021-03-05 19:53:28.377359', 'Utility', 'Phone', 390.31687619327926)
(17, '2021-04-17 18:53:28.377359', 'Utility', 'Electricity', 316.8786754246636)
(18, '2021-05-08 15:53:28.377359', 'Travel', 'Auto', 433.8224042777936)
(19, '2021-05-16 10:53:28.377359', 'Books', 'Flipcart', 109.32590886550068)
(20, '2020-10-12 18:53:28.377359', 'Travel', 'Auto', 365.9218082537661)
(21, '2021-01-04 19:53:28.377359', 'Travel', 'Metro', 329.09737150258513)
(22, '2021-06-24 15:53:28.377359', 'Food', 'Zomato', 489.1434830522253)
(23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919816)
(24, '2021-05-31 11:53:28.377359', 'Books', 'Amazon', 498.10049550461065)
(25, '2021-05-21 14:53:28.377359', 'Food', 'Hotel', 483.315863517772)
(26, '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', 138.806577801854)
(27, '2021-05-01 15:53:28.377359', 'Utility', 'Electricity', 103.68079074846584)
(28, '2020-12-14 15:53:28.377359', 'Utility', 'Phone', 358.4599327957656)
(29, '2021-06-20 10:53:28.377359', 'Utility', 'Electricity', 184.5577284049955)
(30, '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', 203.5292397894327)
(31, '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', 246.503527384528)
(32, '2021-06-23 11:53:28.377359', 'Food', 'Zomato', 345.03043608141513)
(33, '2021-05-14 18:53:28.377359', 'Food', 'Hotel', 449.24802955761743)
(34, '2021-05-14 10:53:28.377359', 'Utility', 'Phone', 499.8581815222449)
(35, '2021-02-18 18:53:28.377359', 'Travel', 'Metro', 441.6021430011205)
(36, '2020-12-10 10:53:28.377359', 'Travel', 'Auto', 472.94143917262176)
(37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673)
(38, '2021-08-15 10:53:28.377359', 'Travel', 'Auto', 494.1243994056571)
(39, '2021-05-17 17:53:28.377359', 'Food', 'Swiggy', 112.33316019807457)
(40, '2021-07-19 12:53:28.377359', 'Food', 'Swiggy', 291.54598801930536)
(41, '2021-02-20 19:53:28.377359', 'Utility', 'Phone', 425.18719068071806)
(42, '2021-08-22 17:53:28.377359', 'Food', 'Hotel', 210.2562695007857)
(43, '2020-09-21 12:53:28.377359', 'Utility', 'Phone', 486.0339327616073)
(44, '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', 257.92759337085425)
(45, '2021-05-27 16:53:28.377359', 'Utility', 'Electricity', 154.74287259516657)
(46, '2021-05-15 15:53:28.377359', 'Utility', 'Electricity', 359.3249716537848)
(47, '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', 310.408610004679)
(48, '2021-08-23 17:53:28.377359', 'Utility', 'Electricity', 310.05840961423314)
(49, '2021-03-16 09:53:28.377359', 'Music', 'spotify', 232.30340219121135)
(50, '2020-12-24 11:53:28.377359', 'Food', 'Zomato', 463.0018749263554)
(51, '2020-12-22 17:53:28.377359', 'Food', 'Zomato', 331.22702332837093)
(52, '2021-03-26 09:53:28.377359', 'Travel', 'Taxi', 403.6100701341934)
(53, '2021-01-27 09:53:28.377359', 'Utility', 'Electricity', 183.1866624101276)
(54, '2020-11-16 10:53:28.377359', 'Music', 'spotify', 160.81754340768396)
(55, '2021-01-21 19:53:28.377359', 'Books', 'Flipcart', 423.74970808720553)
(56, '2021-05-19 18:53:28.377359', 'Utility', 'Phone', 319.3428762684619)
(57, '2021-07-15 15:53:28.377359', 'Utility', 'Phone', 279.6090437716363)
(58, '2021-05-20 10:53:28.377359', 'Food', 'Hotel', 255.8710346734312)
(59, '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', 208.2329120852039)
(60, '2021-01-17 11:53:28.377359', 'Utility', 'Electricity', 382.5195101154448)
(61, '2021-02-25 13:53:28.377359', 'Food', 'Hotel', 124.65827844174062)
(62, '2021-01-27 19:53:28.377359', 'Books', 'Amazon Kindle', 497.7708601564023)
(63, '2021-05-10 11:53:28.377359', 'Travel', 'Taxi', 355.9890502253258)
(64, '2021-01-31 14:53:28.377359', 'Food', 'Zomato', 232.2223798622789)
(65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118)
(66, '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', 263.9577700340145)
(67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731)
(68, '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', 279.1478844739421)
(69, '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', 300.52462041935115)
(70, '2021-08-17 13:53:28.377359', 'Utility', 'Phone', 125.22977317126336)
(71, '2021-03-30 12:53:28.377359', 'Food', 'Swiggy', 245.36050838040904)
(72, '2021-06-30 18:53:28.377359', 'Books', 'Amazon', 294.6628689900488)
(73, '2021-08-15 17:53:28.377359', 'Travel', 'Metro', 117.58872931045572)
(74, '2021-03-20 11:53:28.377359', 'Travel', 'Taxi', 303.05542098520453)
(75, '2021-03-03 12:53:28.377359', 'Food', 'Hotel', 425.6252909948148)
(76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895)
(77, '2021-01-18 14:53:28.377359', 'Books', 'Amazon Kindle', 482.1523430204321)
(78, '2020-09-09 16:53:28.377359', 'Music', 'spotify', 415.3728938035302)
(79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651)
(80, '2021-02-17 09:53:28.377359', 'Food', 'Swiggy', 283.09570727160764)
(81, '2020-10-29 16:53:28.377359', 'Food', 'Hotel', 470.08099539923614)
(82, '2020-09-22 09:53:28.377359', 'Music', 'spotify', 411.14270120842224)
(83, '2021-03-18 09:53:28.377359', 'Books', 'Flipcart', 451.5844070294999)
(84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333)
(85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527)
(86, '2021-05-07 16:53:28.377359', 'Food', 'Zomato', 198.450671792638)
(87, '2021-05-19 15:53:28.377359', 'Food', 'Zomato', 378.82064134052473)
(88, '2021-04-18 09:53:28.377359', 'Utility', 'Phone', 124.2212478444578)
(89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263)
(90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416)
(91, '2021-01-22 17:53:28.377359', 'Food', 'Hotel', 232.66346838787223)
(92, '2021-01-12 19:53:28.377359', 'Travel', 'Taxi', 356.8426379886326)
(93, '2021-01-11 09:53:28.377359', 'Utility', 'Electricity', 111.72080867898062)
(94, '2021-01-04 13:53:28.377359', 'Utility', 'Phone', 431.1855366816298)
(95, '2021-07-19 13:53:28.377359', 'Utility', 'Phone', 388.6712132388421)
(96, '2021-01-12 19:53:28.377359', 'Books', 'Flipcart', 467.5545618966052)
(97, '2021-03-25 11:53:28.377359', 'Utility', 'Phone', 320.78943360123816)
(98, '2021-05-13 15:53:28.377359', 'Travel', 'Taxi', 442.0964693975505)
(99, '2020-10-11 16:53:28.377359', 'Food', 'Hotel', 100.45550129902664)
def mysum(*nums): # nums is variable number of unamed argumets
s = 0
for n in nums:
s += n
return smysum(1, 2)3
mysum(1, 2, 3, 4, 5, 6, 7, 7, 8) # these are unnamed arguments43
def foo(**kwargs): # ** means named arguments
for name, value in kwargs.items():
print(name, value)foo(name="alice", email="alice@wonder.land", place="wanderland") # variable number named argumentsname alice
email alice@wonder.land
place wanderland
def select_query(conn, tablename, *selection, **conditions):
cur = conn.cursor()
SELECTION = ",".join(selection)
CONDITIONS = " & ".join([f"{name} = ?" for name in conditions.keys()])
params = tuple([conditions[name] for name in conditions.keys()])
q = f"select {SELECTION} from {tablename} where {CONDITIONS}"
print(q)
print(params)
return cur.execute(q, params)select a, b, c, d from tablename where cond1=value and cond2 = value2...wallet| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... |
| 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 4 columns
music = select_query(conn, "exepenses", "description", "debit", category="Music")select description,debit from exepenses where category = ?
('Music',)
for items in music:
print(items)('Amazon', 421.2073272347991)
('Amazon', 329.5360031897569)
('Netflix', 354.9402409919816)
('Amazon', 266.0690783774673)
('spotify', 232.30340219121135)
('spotify', 160.81754340768396)
('Netflix', 188.7487426895118)
('Netflix', 324.786916846731)
('Netflix', 197.5346000167895)
('spotify', 415.3728938035302)
('Netflix', 321.7634156544651)
('spotify', 411.14270120842224)
('Netflix', 158.7936457269333)
('Amazon', 130.37490757527)
('Amazon', 218.487173429263)
('Amazon', 101.57327588889416)
df = pd.read_sql_query("select * from exepenses;", con=conn)df| index | date | category | description | debit | |
|---|---|---|---|---|---|
| 0 | 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... | ... |
| 95 | 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 5 columns
!ls upload.pdfupload.pdf
21.4 Regular expressions
Regular are used to search patterns in text!
import reempty_pattern = re.compile("^$") # empty line ... ^ means start of line, $ means end of line
lines_with_sigle_char = re.compile("^.$") # . means any char
date_like_pattern = re.compile(r"^\d\d\d\d-\d{1,2}-\d{1,2}") # \d is for digit,
#{x, y} whatever is before this will come min x times and max y timeslines="""line1
1
2
3
ksjdksa dfjkh fdgjkh
2026-2-14
2025-12-12
sadas
fsdfsf"""def matchpatterns(lines, p):
for line in lines.split("\n"):
if p.match(line):
print("Found a patterns!")
print(line)matchpatterns(lines, empty_pattern)Found a patterns!
Found a patterns!
Found a patterns!
Found a patterns!
Found a patterns!
matchpatterns(lines, lines_with_sigle_char)Found a patterns!
1
Found a patterns!
2
Found a patterns!
3
matchpatterns(lines, date_like_pattern)Found a patterns!
2026-2-14
Found a patterns!
2025-12-12
21.5 activating virtual env through .bat file
%%file jupyler-launch.bat
call c:\usrs\vikrant\trainingenv\Scripts\activate.bat
jupyterlabOverwriting jupyler-launch.bat