Skip to content

Instantly share code, notes, and snippets.

@jBenes
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jBenes/76d1746c7a2642d847af to your computer and use it in GitHub Desktop.
Save jBenes/76d1746c7a2642d847af to your computer and use it in GitHub Desktop.
import csv
import argparse
from collections import defaultdict
def load_routes(routes_file=None):
routes = defaultdict(set)
if routes_file:
with open(routes_file) as lines:
for line in lines:
src,dst = line.replace("\r","").replace("\n","").split("\t")[:2]
routes[src].add(dst)
return routes
def replace_row(prepared_rows,src,dst,row_id,reserved_rows,count_replaced):
replaced = False
while not replaced:
if row_id not in reserved_rows:
prepared_rows[row_id][143] = src
prepared_rows[row_id][146] = dst
replaced = True
count_replaced += 1
# print row_id, replaced
row_id -= 1
return row_id,count_replaced
def process_file(source_file, destination_file,routes_file=None):
print 'process_file: start source_file %s, destination_file %s'%(source_file, destination_file)
routes = load_routes(routes_file)
source = open(source_file, "rb")
destination = open(destination_file, 'wb')
datareader = csv.reader(source,delimiter='^')
datawriter = csv.writer(destination,delimiter='^')
prepared_rows = []
reserved_rows = set()
id = 0
for row in datareader:
id += 1
row[1] = id
row[152] = 2
row[189] = 'OW'
# print row[143], row[146]
if row[143] in routes and row[146] in routes[row[143]]:
reserved_rows.add(id)
routes[row[143]].remove(row[146])
prepared_rows.append(row)
# datawriter.writerow(row)
# id += 1
# row[1] = id
# row[152] = None
# row[153] = None
# row[189] = 'OW'
# datawriter.writerow(row)
# print reserved_rows
# print routes
row_id = id - 1
count_replaced = 0
for src,dsts in routes.iteritems():
for dst in list(dsts):
row_id,count_replaced = replace_row(prepared_rows,src,dst,row_id,reserved_rows,count_replaced)
for row in prepared_rows:
datawriter.writerow(row)
print 'done', count_replaced
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--source_file", help="csv file to process", type=str)
parser.add_argument("--destination_file", help="where to store this shit", type=str)
parser.add_argument("--routes_file", help="prior routes", type=str)
args = parser.parse_args()
process_file(args.source_file,args.destination_file,args.routes_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment