Skip to content

Instantly share code, notes, and snippets.

@peterbe
Created May 15, 2018 19:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterbe/a732bf502892d21200ef06f99e8a6751 to your computer and use it in GitHub Desktop.
Save peterbe/a732bf502892d21200ef06f99e8a6751 to your computer and use it in GitHub Desktop.
import datetime
import gzip
import csv
from glob import glob
import ciso8601
cutoff = datetime.datetime.utcnow().replace(
tzinfo=datetime.timezone.utc
) - datetime.timedelta(days=6 * 30)
def count(fn):
count = total = 0
with gzip.open(fn, 'rt') as f:
reader = csv.reader(f)
for line in reader:
lastmodified = ciso8601.parse_datetime(line[3])
if lastmodified > cutoff:
count += 1
total += 1
return total, count
def run():
total = recent = 0
# for fn in glob('*.csv.gz'):
for i, fn in enumerate(glob('*.csv.gz')):
if len(fn) == 39:
print(i + 1, fn)
t, c = count(fn)
total += t
recent += c
print(total)
print(recent)
print('{:.1f}%'.format(100 * recent / total))
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment