Skip to content

Instantly share code, notes, and snippets.

@tdunning
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tdunning/e8f0c9ca213a3ff055cc to your computer and use it in GitHub Desktop.
Save tdunning/e8f0c9ca213a3ff055cc to your computer and use it in GitHub Desktop.
import fileinput
from string import join
import json
import csv
import json
### read the output from MAHOUT and collect into hash ###
with open('x','rb') as csv_file:
csv_reader = csv.reader(csv_file,delimiter='\t')
old_id = ""
indicators = []
update = {"_id":""}
doc = {"indicators":[], "numFields":0}
for row in csv_reader:
id = row[0]
if (id != old_id and old_id != ""):
update["_id"] = old_id
doc["indicators"] = indicators
doc["numFields"] = len(indicators)
print(json.dumps(update))
print(json.dumps(doc))
indicators = [row[1]]
else:
indicators.extend(row[1])
old_id = id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment