Skip to content

Instantly share code, notes, and snippets.

@pudquick
Last active August 29, 2015 14:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pudquick/70b35c85ddb69a13cbdb to your computer and use it in GitHub Desktop.
Save pudquick/70b35c85ddb69a13cbdb to your computer and use it in GitHub Desktop.
Saturday morning fun.
import sys, os.path, csv
# Run me like: python word_magic.py inputfile.csv output_dir
KEEP_WORD = True
# We're being lazy here - no main for us, it's Saturday
in_filename = sys.argv[1]
out_dir = os.path.abspath(sys.argv[2])
f = open(in_filename, 'rb')
reader = csv.reader(f)
lines = [x for x in reader]
f.close()
# Build a dictionary of the words, storing the lines we find for each
word_files = {}
for whole_line in lines:
the_word = whole_line[0]
if KEEP_WORD:
# If KEEP_WORD is True, store the whole line.
the_line = whole_line
else:
the_line = whole_line[1:]
if not word_files.has_key(the_word):
# Never seen this word before, setup a new output file for it
word_files[the_word] = []
# Add the line
word_files[the_word].append(the_line)
# Now make our output files
for each_word in word_files.keys():
f = open(os.path.join(out_dir, '%s.csv' % each_word), 'wb')
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
for each_entry in word_files[each_word]:
writer.writerow(each_entry)
f.close()
print "Done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment