import datetime | |
import gzip | |
import csv | |
from glob import glob | |
import ciso8601 | |
cutoff = datetime.datetime.utcnow().replace( | |
tzinfo=datetime.timezone.utc | |
) - datetime.timedelta(days=6 * 30) | |
def count(fn): | |
count = total = 0 | |
with gzip.open(fn, 'rt') as f: | |
reader = csv.reader(f) | |
for line in reader: | |
lastmodified = ciso8601.parse_datetime(line[3]) | |
if lastmodified > cutoff: | |
count += 1 | |
total += 1 | |
return total, count | |
def run(): | |
total = recent = 0 | |
# for fn in glob('*.csv.gz'): | |
for i, fn in enumerate(glob('*.csv.gz')): | |
if len(fn) == 39: | |
print(i + 1, fn) | |
t, c = count(fn) | |
total += t | |
recent += c | |
print(total) | |
print(recent) | |
print('{:.1f}%'.format(100 * recent / total)) | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment