Skip to content

Instantly share code, notes, and snippets.

@public
Created February 17, 2014 09:40
Show Gist options
  • Save public/9047618 to your computer and use it in GitHub Desktop.
Save public/9047618 to your computer and use it in GitHub Desktop.
import sys
import time
import hotshot
import hotshot.stats
import ipdb
import openpyxl
def get_process_rss():
procstat = open("/proc/self/status").readlines()
for line in procstat:
if line.startswith("VmSize:"):
return int(line.split()[1]) * 1024
else:
return 0
def highest_row(sheet):
return sheet.get_highest_row()
def highest_column(sheet):
try:
return max(
openpyxl.cell.column_index_from_string(cell.column)
for cell in sheet._cells.itervalues()
) - 1
except ValueError:
return 1
def main(args):
start = time.time()
start_mem = get_process_rss()
print "start", start, start_mem
wb = openpyxl.load_workbook(args[1])
opened = time.time()
opened_mem = get_process_rss()
print "open", opened-start, opened_mem-start_mem
# We are going to go and count the numberof cells with values,
# the number of cells within our bounding box, and the number of
# cell.value accesses we do.
values = 0
cells = 0
accesses = 0
for sheet in wb.worksheets:
rows = highest_row(sheet)
columns = highest_column(sheet)
for r in xrange(rows):
blanks = 0
for c in xrange(columns):
cell = sheet.cell(row=r, column=c)
if cell.value is not None:
values += 1
else:
blanks += 1
cell.offset(row=1, column=1).value
accesses += 2
cells += 1
if blanks == c+1:
break
end = time.time()
total = end-start
done_mem = get_process_rss()
print "read", total, cells, values, accesses, done_mem-start_mem
cells = float(cells)
accesses = float(accesses)
values = float(values)
print (total/accesses)*1000, "ms per cell access"
print (done_mem-start_mem) / cells, "bytes per cell"
print (done_mem-start_mem) / values, "bytes per value"
if 0:
prof = hotshot.Profile("speed.prof")
prof.runcall(main, sys.argv)
prof.close()
stats = hotshot.stats.load("speed.prof")
stats.strip_dirs()
stats.sort_stats('time', 'calls')
ipdb.set_trace()
else:
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment