Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@drjwbaker
Forked from benosteen/add_numbers.py
Created October 3, 2013 13:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drjwbaker/6809457 to your computer and use it in GitHub Desktop.
Save drjwbaker/6809457 to your computer and use it in GitHub Desktop.
add_numbers.py
import csv
import json
INPUTFILE = "History_Journal_Articles_KW.csv"
OUTPUTFILE = INPUTFILE[:-4] + "_numbered.csv"
in_file = open(INPUTFILE, "r") # "r" == Open file for reading
out_file = open(OUTPUTFILE, "w") # "w" for writing
# csv
rows_in = csv.reader(in_file)
rows_out = csv.writer(out_file)
# little function to get or generate a number for a word
topwords = {}
journals = {}
def get_or_set_number(word, words, counter):
if word not in words:
counter += 1
words[word] = counter
return counter, counter
else:
return words[word], counter
headers = rows_in.next()
rows_out.writerow(headers)
numberofrows = 0
counter = 0
for row in rows_in:
# Which column is the word in? (Computer counting very often starts at zero, not one!)
topword_number, counter = get_or_set_number(row[0], topwords, counter)
row[1] = topword_number
journal_number, counter = get_or_set_number(row[6], journals, counter)
row[7] = journal_number
rows_out.writerow(row)
numberofrows += 1
out_file.close()
in_file.close()
print("Number of ids: {0}".format(counter))
print("Number of rows: {0}".format(numberofrows))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment