Skip to content

Instantly share code, notes, and snippets.

@vicenterocha
Last active October 27, 2021 17:39
Show Gist options
  • Save vicenterocha/d90fab37114bfa0dcc4320a310df5b3f to your computer and use it in GitHub Desktop.
Save vicenterocha/d90fab37114bfa0dcc4320a310df5b3f to your computer and use it in GitHub Desktop.
import csv
from collections import defaultdict
lines = []
total_values_per_group = defaultdict(int)
with open('/Users/vicente.rocha/Downloads/david.txt') as f:
csv_reader = csv.reader(f, delimiter=',')
# iterate and add only 20 values per group
for row in csv_reader:
index = row[0]
value = row[1]
total_values_per_group[value] += 1
if total_values_per_group[value] <= 20:
lines.append((row[0], row[1]))
# verify
validator = defaultdict(int)
for l in lines:
validator[l] += 1
for val, size in validator.items():
if size != 20:
print(f"{val} has size {size}")
# write to disk
with open('/Users/vicente.rocha/Downloads/david_fix.txt', 'w') as f:
for l in lines:
f.write(f"{l[0]},{l[1]}\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment