Instantly share code, notes, and snippets.

Embed
What would you like to do?
import datetime
import gzip
import csv
from glob import glob
import concurrent.futures
cutoff = datetime.datetime.now() - datetime.timedelta(days=6 * 30)
def count(fn, i):
print(i, fn)
count = total = 0
with gzip.open(fn, 'rt') as f:
reader = csv.reader(f)
for line in reader:
lastmodified = datetime.datetime.strptime(
line[3],
'%Y-%m-%dT%H:%M:%S.%fZ'
)
if lastmodified > cutoff:
count += 1
total += 1
return total, count
def run():
total = recent = 0
futures = []
with concurrent.futures.ThreadPoolExecutor() as executor:
for i, fn in enumerate(glob('*.csv.gz')):
if len(fn) == 39:
futures.append(executor.submit(count, fn, i))
for future in concurrent.futures.as_completed(futures):
t, c = future.result()
total += t
recent += c
print(total)
print(recent)
print('{:.1f}%'.format(100 * recent / total))
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment