Skip to content

Instantly share code, notes, and snippets.

@phaustin
Last active August 29, 2015 14:15
Show Gist options
  • Save phaustin/7e080a90934546457e41 to your computer and use it in GitHub Desktop.
Save phaustin/7e080a90934546457e41 to your computer and use it in GitHub Desktop.
openpyxl to pandas with bad values
# -*- coding: utf-8 -*-
from __future__ import print_function
from builtins import next
from openpyxl import load_workbook
import glob,os
import pandas as pd
import numpy as np
import logging
logging.basicConfig(level=logging.DEBUG)
home=os.environ['HOME']
the_file=glob.glob('{}/Dropbox/e340_2014_spring/Exams/Mid_term_1/grades/*cl*201.xlsx'.format(home))[0]
wb=load_workbook(the_file,data_only=True,use_iterators=True)
combine,=wb.get_sheet_names()
sheet=wb[combine]
row_iter=sheet.iter_rows()
headers = [c.value for c in next(row_iter) if c.value]
headers=[item.replace(' ','_').lower() for item in headers]
grades = np.empty(1, dtype=[('id','int_'),('last','unicode_'),('first','unicode_'),
('pref','unicode_'),('group_num','int_'),
('group_mark','int_'),('ind_mark','int_')])
df=pd.DataFrame(grades)
count=0
for count,row in enumerate(row_iter):
out=[item.value for item in row]
out[0]=int(out[0])
if len(out[3]) == 0:
out[3]='nan'
for cell,number in enumerate(out[-3:]):
try:
out[-3 + cell]=int(number)
except (ValueError,TypeError):
out[-3 + cell]=int(-999)
df.loc[count]=out
print(df)
with pd.HDFStore('store.h5','w') as store:
store.put('mid_term1',df,format='table')
with pd.HDFStore('store.h5','r') as store:
new_df=store['mid_term1']
* convert a row to a dictionary and subset
the_file='classlists_2014W_UBC_EOSC_340_201-1.xlsx'
wb=load_workbook(the_file,data_only=True,use_iterators=True)
names=wb.get_sheet_names()
sheet=wb['Combined']
row_iter=sheet.iter_rows()
colnames=next(row_iter)
colnames=[c.value for c in colnames]
the_list=[]
for row in row_iter:
values=[c.value for c in row]
the_dict=dict(zip(colnames,values))
newnames={'Student Number':'studentid','Given Name':'firstname','Surname':'lastname','Preferred Name':'preferred_name'}
prune_keys=newnames.keys()
sub_dict={newnames[k]: the_dict[k] for k in prune_keys}
the_list.append(sub_dict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment