import csv16 Module 2 - Day 5
Topics
- Working with CSV files
- Using built in module to read and write CSV file
- Reading/Writing Excel Files with xlsxwriter
- Introduction to
xlsxwriter, writing excel files
For today’s practice make use of notebook module2-day5.ipynb created in your enviroment. Shut down kernel for all previous notebooks (if in runing condition) by right cliking on notbeook on left hand side file browser
16.1 built in csv module
%%file stocks.csv
Symbol,Price,Date,Time,Change,Volume
"AA",39.48,"6/11/2007","9:36am",-0.18,181800
"AIG",71.38,"6/11/2007","9:36am",-0.15,195500
"AXP",62.58,"6/11/2007","9:36am",-0.46,935000
"BA",98.31,"6/11/2007","9:36am",+0.12,104800
"C",53.08,"6/11/2007","9:36am",-0.25,360900
"CAT",78.29,"6/11/2007","9:36am",-0.23,225400Overwriting stocks.csv
import csv
with open("stocks.csv") as f:
csvhandle = csv.reader(f) # this does not consider name of columns
for row in csvhandle:
print(row)['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
['AA', '39.48', '6/11/2007', '9:36am', '-0.18', '181800']
['AIG', '71.38', '6/11/2007', '9:36am', '-0.15', '195500']
['AXP', '62.58', '6/11/2007', '9:36am', '-0.46', '935000']
['BA', '98.31', '6/11/2007', '9:36am', '+0.12', '104800']
['C', '53.08', '6/11/2007', '9:36am', '-0.25', '360900']
['CAT', '78.29', '6/11/2007', '9:36am', '-0.23', '225400']
"1,2,3,5".split(",")['1', '2', '3', '5']
'"col A","col B",C,D'.split(",")['"col A"', '"col B"', 'C', 'D']
fields = [("Price", float),
("Change", float),
("Volume", int)]
def read_csv(filename, fields):
with open(filename) as f:
csvhandle = csv.DictReader(f)
for row in csvhandle:
print(row)
read_csv("stocks.csv", fields){'Symbol': 'AA', 'Price': '39.48', 'Date': '6/11/2007', 'Time': '9:36am', 'Change': '-0.18', 'Volume': '181800'}
{'Symbol': 'AIG', 'Price': '71.38', 'Date': '6/11/2007', 'Time': '9:36am', 'Change': '-0.15', 'Volume': '195500'}
{'Symbol': 'AXP', 'Price': '62.58', 'Date': '6/11/2007', 'Time': '9:36am', 'Change': '-0.46', 'Volume': '935000'}
{'Symbol': 'BA', 'Price': '98.31', 'Date': '6/11/2007', 'Time': '9:36am', 'Change': '+0.12', 'Volume': '104800'}
{'Symbol': 'C', 'Price': '53.08', 'Date': '6/11/2007', 'Time': '9:36am', 'Change': '-0.25', 'Volume': '360900'}
{'Symbol': 'CAT', 'Price': '78.29', 'Date': '6/11/2007', 'Time': '9:36am', 'Change': '-0.23', 'Volume': '225400'}
fields = [("Price", float),
("Change", float),
("Volume", int)]
def read_csv(filename, fields):
with open(filename) as f:
csvhandle = csv.DictReader(f)
data = []
for row in csvhandle:
row.update({colname: convert(row[colname]) for colname, convert in fields})
data.append(row)
return data
read_csv("stocks.csv", fields)[{'Symbol': 'AA',
'Price': 39.48,
'Date': '6/11/2007',
'Time': '9:36am',
'Change': -0.18,
'Volume': 181800},
{'Symbol': 'AIG',
'Price': 71.38,
'Date': '6/11/2007',
'Time': '9:36am',
'Change': -0.15,
'Volume': 195500},
{'Symbol': 'AXP',
'Price': 62.58,
'Date': '6/11/2007',
'Time': '9:36am',
'Change': -0.46,
'Volume': 935000},
{'Symbol': 'BA',
'Price': 98.31,
'Date': '6/11/2007',
'Time': '9:36am',
'Change': 0.12,
'Volume': 104800},
{'Symbol': 'C',
'Price': 53.08,
'Date': '6/11/2007',
'Time': '9:36am',
'Change': -0.25,
'Volume': 360900},
{'Symbol': 'CAT',
'Price': 78.29,
'Date': '6/11/2007',
'Time': '9:36am',
'Change': -0.23,
'Volume': 225400}]
fields = [("Price", float),
("Change", float),
("Volume", int)]
def get_column(filename, columnname):
data = read_csv(filename, fields)
return [row[columnname] for row in data]get_column("stocks.csv", "Volume")[181800, 195500, 935000, 104800, 360900, 225400]
%%file indexdata.csv
symbol,price,change,volume
XFNBI,154.27522023650917,2.871245637237712,3926
ORGHT,117.97670217978481,4.539536223184663,586
VEBHV,113.3520799776703,0.6635982458596046,3688
BYRIK,104.2138293580797,2.626015334200588,1767
VCRRD,155.43160467760868,0.4681719455962513,641
PEQVY,181.21666821143089,0.42729711749318167,2738
LQTQZ,168.33573338890363,3.521911607612079,2670
HPDAK,155.96008397206708,1.7239221076463696,4259
XLHUI,174.46964754068694,3.745170925696039,2770
JEAZX,148.7162928469953,2.6624961725339205,1614
QOZAM,192.1176069819026,4.139241768741275,726
XJAHU,150.20434996488757,3.1065263183864253,2688
VLXDC,178.73486584779386,3.8678532141202036,4103
PJQEK,199.0461185921472,3.2096859326164178,1592
DWJDV,148.7534049268761,3.331681968931371,2177
ORJMD,174.11752806601532,3.180221402115432,909
DLJGR,117.33043838185898,0.34686211777134657,4092
YCHHM,144.8746810221045,3.3530551951924155,1571
WFNNB,147.65332106681385,3.7760281088943017,3195
YNRZG,128.86957732022677,4.5699186134962435,2713Overwriting indexdata.csv
fields = [("price", float),
("change", float),
("volume", int)]
def get_column(filename, fields , columnname):
data = read_csv(filename, fields)
return [row[columnname] for row in data]get_column("indexdata.csv", fields, "change")[2.871245637237712,
4.539536223184663,
0.6635982458596046,
2.626015334200588,
0.4681719455962513,
0.42729711749318167,
3.521911607612079,
1.7239221076463696,
3.745170925696039,
2.6624961725339205,
4.139241768741275,
3.1065263183864253,
3.8678532141202036,
3.2096859326164178,
3.331681968931371,
3.180221402115432,
0.34686211777134657,
3.3530551951924155,
3.7760281088943017,
4.5699186134962435]
16.2 Writing excel files
We will use thrid party module xlsxwriter. Third party modules or libraries do not come with python basic installation. to install any third party library we make use of pip
pip install xlsxwriter
!pip install xlsxwriterDefaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: xlsxwriter in /opt/tljh/user/lib/python3.12/site-packages (3.2.9)
import xlsxwriter16.2.1 creating excel file and worksheet
import xlsxwriter
workbook = xlsxwriter.Workbook("first-excel.xlsx")
sheet = workbook.add_worksheet("My First Worksheet")
sheet.write("C1", "Hello!")
sheet.write(2, 5, "Hello Python") # write in F3 ..row, col it starts from 0
workbook.close() # this is necessary for saving the filedata--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[16], line 1 ----> 1 data NameError: name 'data' is not defined
data = read_csv("indexdata.csv", fields=fields)import xlsxwriter
def write_tabular(data, filename):
workbook = xlsxwriter.Workbook(filename)
sheet = workbook.add_worksheet("data")
# to write headers
for cnum, colvalue in enumerate(data[0].keys()):
sheet.write(0, cnum, colvalue)
# to write data
for rnum, row in enumerate(data, start=1):
for cnum, value in enumerate(row.values()):
sheet.write(rnum, cnum, value)
workbook.close()
write_tabular(data, "indexdata.xlsx")16.2.2 Adding format
import xlsxwriter
workbook = xlsxwriter.Workbook("bold.xlsx")
s = workbook.add_worksheet()
bold = workbook.add_format({"bold":True})
s.write("A1", "Python", bold)
workbook.close()Workbook, BankAccount, ExcelSheet - Camelcase usually followed for class names
add_worksheet , save_data - snake case ..usually for methds and functions
problem Modify write_tablar function so that it write the columns names in bold format.
import xlsxwriter
def write_tabular(data, filename):
workbook = xlsxwriter.Workbook(filename)
sheet = workbook.add_worksheet("data")
# to write headers
format_ = workbook.add_format({"bold":True, "italic":True})
for cnum, colvalue in enumerate(data[0].keys()):
sheet.write(0, cnum, colvalue, format_)
# to write data
for rnum, row in enumerate(data, start=1):
for cnum, value in enumerate(row.values()):
sheet.write(rnum, cnum, value)
workbook.close()
write_tabular(data, "index-fomatted.xlsx")16.2.3 formula?
import xlsxwriter
import string
def write_tabular(data, filename, sumcolumn):
workbook = xlsxwriter.Workbook(filename)
sheet = workbook.add_worksheet("data")
# to write headers
format_ = workbook.add_format({"bold":True, "italic":True})
for cnum, colvalue in enumerate(data[0].keys()):
sheet.write(0, cnum, colvalue, format_)
# to write data
for rnum, row in enumerate(data, start=1):
for cnum, value in enumerate(row.values()):
sheet.write(rnum, cnum, value)
rownumber = len(data) + 1
colnumber = list(data[0].keys()).index(sumcolumn)
rangechar = string.ascii_uppercase[colnumber]
sheet.write(rownumber, colnumber, f"=SUM({rangechar}2:{rangechar}{rownumber})")
workbook.close()
import string
string.ascii_uppercase[3]'D'
write_tabular(data, "indexdata-formula.xlsx", 'volume')16.2.4 add table
import xlsxwriter
import string
def write_tabular(data, filename, sumcolumn):
workbook = xlsxwriter.Workbook(filename)
sheet = workbook.add_worksheet("data")
# to write headers
format_ = workbook.add_format({"bold":True, "italic":True})
for cnum, colvalue in enumerate(data[0].keys()):
sheet.write(0, cnum, colvalue, format_)
# to write data
for rnum, row in enumerate(data, start=1):
for cnum, value in enumerate(row.values()):
sheet.write(rnum, cnum, value)
rownumber = len(data) + 1
colnumber = list(data[0].keys()).index(sumcolumn)
rangechar = string.ascii_uppercase[colnumber]
endrange = f"{rangechar}{rownumber}"
sheet.write(rownumber, colnumber, f"=SUM({rangechar}2:{endrange}")
sheet.add_table(f"A1:{rangechar}{rownumber-1}", {"columns":
[{"header":name} for name in data[0].keys()]
})
workbook.close()
write_tabular(data, "index-with-table.xlsx", "volume")16.3 Questions and recap
Iteration patters
- enumerate
- reversed
- zip
List comprehesions
[{'header':name} for name in data[0].key()]Dictionary comprehension
read files
write files (modes
- ‘w’ - write a file form fresh
- ‘a’ append to a file
classes
- data encapuslation
- mehods to manage or change data
Dictianry
- classification
csv module
- csv.reader which done parsing of csv without considering column names
- csv.Dictreader .. this considers the columns names
xlsxwriter
- referece document [https://xlsxwriter.readthedocs.io/index.html]
colnames = ['a','b','c','v']
values = [23, 24, 25, 25]
d = {k:v for k,v in zip(colnames, values)}
d{'a': 23, 'b': 24, 'c': 25, 'v': 25}
d['w']--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[46], line 1 ----> 1 d['w'] KeyError: 'w'
d.get("w", "default")'default'
d{'a': 23, 'b': 24, 'c': 25, 'v': 25}
d.setdefault("w", "default")'default'
d{'a': 23, 'b': 24, 'c': 25, 'v': 25, 'w': 'default'}
d.setdefault("w", "x")'default'
d{'a': 23, 'b': 24, 'c': 25, 'v': 25, 'w': 'default'}