Skip to content

Instantly share code, notes, and snippets.

@lincank
Created October 23, 2012 15:51
Show Gist options
  • Save lincank/3939597 to your computer and use it in GitHub Desktop.
Save lincank/3939597 to your computer and use it in GitHub Desktop.
calculate average date difference in xls file with python
import datetime
import xlrd
# change the input path and output path
input_path = "/tmp/date.xls"
terminated_path = "/tmp/terminated.xlsx"
output_path = "/tmp/output.csv"
terminated_str = "terminate after IC"
def get_date(dateStr):
"""
new a date object from string like "09/12/2012"
"""
month,day,year = dateStr.split("/")
return datetime.date(int(year), int(month), int(day))
def get_date_diff_average(values, exclude_list):
"""
extract all dates from one row and calculate average date difference
"""
# discard those terminated items
if values[0] in exclude_list:
return terminated_str
length = len(values) - 1
date_list = []
for num in range(length):
cel = values[length - num]
if cel:
date_list.append(get_date(cel))
# get date diff with today if only one date available
if len(date_list) < 2:
return abs((datetime.date.today() - date_list[0]).days)
else:
date_diff = abs((date_list[0] - date_list[-1]).days)
return float(date_diff) / (len(date_list) - 1)
# prepare list for terminated items
wbx = xlrd.open_workbook(terminated_path)
shx = wbx.sheet_by_index(0)
exclude_list = []
for x in range(shx.nrows):
exclude_list.append(shx.row_values(x)[0])
# read input xls file
wb = xlrd.open_workbook(input_path)
sh = wb.sheet_by_index(0)
# open file, change path if needed
# be careful with space in the path
f = open(output_path, "w")
for i in range(sh.nrows):
row = sh.row_values(i)
avr = get_date_diff_average(row, exclude_list)
f.write("%s, , %s\n" % (row[0], avr))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment