Skip to content

Instantly share code, notes, and snippets.

@foolip
Last active May 24, 2019 09:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save foolip/d18bd25bd2547c225b4a677dbda31e4b to your computer and use it in GitHub Desktop.
Save foolip/d18bd25bd2547c225b4a677dbda31e4b to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import csv
import math
import random
rows_by_logrank = {}
with open('majestic_million.csv', 'r') as csvfile:
majestic = csv.DictReader(csvfile)
for row in majestic:
rank = int(row['GlobalRank'])
logrank = int(math.log10(rank))
if logrank in rows_by_logrank:
rows_by_logrank[logrank].append(row)
else:
rows_by_logrank[logrank] = [row]
for logrank, rows in rows_by_logrank.items():
if len(rows) < 10:
continue
subset = random.sample(rows, 10)
print(f'# Logrank {logrank}')
for row in subset:
domain = row['Domain']
rank = row['GlobalRank']
print(f'## {domain} ({rank})')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment