Skip to content

Instantly share code, notes, and snippets.

@peterbe
Created May 15, 2018 19:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterbe/297c5553773902fea03b1667cf056275 to your computer and use it in GitHub Desktop.
Save peterbe/297c5553773902fea03b1667cf056275 to your computer and use it in GitHub Desktop.
import datetime
import gzip
import csv
from glob import glob
cutoff = datetime.datetime.now() - datetime.timedelta(days=6 * 30)
def count(fn):
count = total = 0
with gzip.open(fn, 'rt') as f:
reader = csv.reader(f)
for line in reader:
lastmodified = datetime.datetime.strptime(
line[3],
'%Y-%m-%dT%H:%M:%S.%fZ'
)
if lastmodified > cutoff:
count += 1
total += 1
return total, count
def run():
total = recent = 0
for i, fn in enumerate(glob('*.csv.gz')):
if len(fn) == 39:
print(i + 1, fn)
t, c = count(fn)
total += t
recent += c
print(total)
print(recent)
print('{:.1f}%'.format(100 * recent / total))
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment