Skip to content

Instantly share code, notes, and snippets.

@1328
Last active February 5, 2019 16:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 1328/f76cc44aed256f1b0cee55f3e3bc2925 to your computer and use it in GitHub Desktop.
Save 1328/f76cc44aed256f1b0cee55f3e3bc2925 to your computer and use it in GitHub Desktop.
notes.py
from collections import defaultdict
import csv
csv.field_size_limit(999999999)
def get_csv(filename):
# get data from csv
# don't use built ins, like input, as variable names
with open('filename', 'r') as file_handle:
reader = csv.reader(file_handle, delimiter='|')
return [row for row in reader]
def get_targets(filename):
# read in filename and return a set of each master key
return set([row[0] for row in get_csv(filename)])
def get_keepers(filename):
# read in filename and return a set with each report_key with filter 'M' in third column
keepers = set()
# let's recycle our get_csv function here
rows = get_csv(filename)
for row in rows:
if row[2] == 'M':
keepers.add(row[0])
# this should work, but add a print statement to make sure as you debug
print(keepers)
# this could also be:
keepers = set(r[0] for r in rows if r[2] == 'M')
print(keepers)
return keepers
def gather(keepers, data_file):
'''
This returns a dictionary of report_key: [list of data_points]
'''
found = defaultdict(list)
# remember we need to reuse our read_csv function
for key, point in read_csv(data_file):
if key in keepers:
found[key].append(point)
return found
def save_csv(found):
with open('outputCSV.txt', 'w', newline='') as output:
writer = csv.writer(output, delimiter='|')
# since the dictionary is set up like report_key:[list of data], we
# need both the key and value, so use the items() method to get both
for report_key, data_points in found.items():
# now build the new_row to look like:
# [report_key, data, data, ...]
new_row = [report_key] + data_points
# just to make sure, print during debugging
print(new_row)
writer.writerow(new_row)
def main():
targets = get_targets('masterkeyCSV.txt')
keepers = get_keepers('CSVwithFILTER.txt')
keepers = targets & keepers
found = gather(keepers, 'CSVwithDATAPOINT.txt')
save_csv(found)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment