-
-
Save 1328/f76cc44aed256f1b0cee55f3e3bc2925 to your computer and use it in GitHub Desktop.
notes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import csv | |
csv.field_size_limit(999999999) | |
def get_csv(filename): | |
# get data from csv | |
# don't use built ins, like input, as variable names | |
with open('filename', 'r') as file_handle: | |
reader = csv.reader(file_handle, delimiter='|') | |
return [row for row in reader] | |
def get_targets(filename): | |
# read in filename and return a set of each master key | |
return set([row[0] for row in get_csv(filename)]) | |
def get_keepers(filename): | |
# read in filename and return a set with each report_key with filter 'M' in third column | |
keepers = set() | |
# let's recycle our get_csv function here | |
rows = get_csv(filename) | |
for row in rows: | |
if row[2] == 'M': | |
keepers.add(row[0]) | |
# this should work, but add a print statement to make sure as you debug | |
print(keepers) | |
# this could also be: | |
keepers = set(r[0] for r in rows if r[2] == 'M') | |
print(keepers) | |
return keepers | |
def gather(keepers, data_file): | |
''' | |
This returns a dictionary of report_key: [list of data_points] | |
''' | |
found = defaultdict(list) | |
# remember we need to reuse our read_csv function | |
for key, point in read_csv(data_file): | |
if key in keepers: | |
found[key].append(point) | |
return found | |
def save_csv(found): | |
with open('outputCSV.txt', 'w', newline='') as output: | |
writer = csv.writer(output, delimiter='|') | |
# since the dictionary is set up like report_key:[list of data], we | |
# need both the key and value, so use the items() method to get both | |
for report_key, data_points in found.items(): | |
# now build the new_row to look like: | |
# [report_key, data, data, ...] | |
new_row = [report_key] + data_points | |
# just to make sure, print during debugging | |
print(new_row) | |
writer.writerow(new_row) | |
def main(): | |
targets = get_targets('masterkeyCSV.txt') | |
keepers = get_keepers('CSVwithFILTER.txt') | |
keepers = targets & keepers | |
found = gather(keepers, 'CSVwithDATAPOINT.txt') | |
save_csv(found) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment