Skip to content

Instantly share code, notes, and snippets.

@d136o
Created October 5, 2015 20:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d136o/ef5dc06976ebb73e98c3 to your computer and use it in GitHub Desktop.
Save d136o/ef5dc06976ebb73e98c3 to your computer and use it in GitHub Desktop.
Tally up code counts per day
import os
import pandas
def main():
data_directory = './replay-correction'
output_fname = './icon_code_count.csv'
dates = []
codes = []
for (base, dirs, files) in os.walk(data_directory):
for filename in files:
with open(os.path.join(base,filename),'rb') as f:
for line in f:
date_str = line[44:56]
code_str = line[56:60]
dates.append(date_str)
codes.append(code_str)
dates = pandas.Series(dates)
codes = pandas.Series(codes)
data = pandas.DataFrame({'date_str':dates,'code':codes})
data['date_dt'] = pandas.to_datetime(data['date_str'],format='%Y%m%d%H%M')
daily_counts = data.groupby([data['date_dt'].dt.to_period("d"),data["code"]]).count()
daily_counts.drop(daily_counts.columns[1],1,inplace=True)
daily_counts.columns = ['counts']
daily_counts.to_csv(output_fname)
return (dates, codes, data, daily_counts)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment