Skip to content

Instantly share code, notes, and snippets.

@aflansburg
Created October 20, 2017 21:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflansburg/3f72f6234042f214bd68621f3f79c45d to your computer and use it in GitHub Desktop.
Save aflansburg/3f72f6234042f214bd68621f3f79c45d to your computer and use it in GitHub Desktop.
Reads a CSV column, writes words and their counts to new CSV
import re, csv
import pandas as pd
from collections import Counter
from random import randint
csvfile = 'mycsvfile.csv'
# regex = r'\w+'
regex = r"\b[^\d\W]+\b" # this will get omit words containing numbers like 4WD or a part number 123ABCD
commonWords = ['a', 'with', 'the', 'and', 'set', 'foot', 'for', 'inch', 'on', 'models',
'model']
data = pd.read_csv(csvfile)
words = []
for row in data.Title:
matches = re.findall(regex, row)
for m in matches:
if m.lower() != 'rough' and m.lower() != 'country' and m.lower() not in commonWords:
words.append(m.lower())
for word in words:
print(word)
print(f'Total count of words: {len(words)}\n')
counts = Counter(words)
try:
with open('exported_words.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Word", "Occurence"])
for key, value in counts.items():
writer.writerow([key, value])
except PermissionError:
with open('exported_words' + str(randint(100, 500)) + '.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Word", "Occurence"])
for key, value in counts.items():
writer.writerow([key, value])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment