Skip to content

Instantly share code, notes, and snippets.

@andychase
Created October 10, 2018 04:19
Show Gist options
  • Save andychase/4917c7e0c41df5b8f0d18efda0d8c455 to your computer and use it in GitHub Desktop.
Save andychase/4917c7e0c41df5b8f0d18efda0d8c455 to your computer and use it in GitHub Desktop.
import sys
import csv
from collections import Counter
def get_themes(themes, themes_db):
for theme in themes:
percent = themes_db[theme] / len(list(themes_db.elements()))
yield f"{int(percent*100)}%", f"{theme.capitalize()}"
def main(rows):
themes_db = Counter()
for row in rows:
excerpt, source, location, all_themes = row[:4]
themes = {i.strip().strip(")").lower() for i in all_themes.split("(")} - {"+", "-"}
themes_db.update({theme: 1 for theme in themes})
for row in rows:
excerpt, source, location, all_themes = row[:4]
themes = {i.strip(" /-+)(").lower() for i in all_themes.split("(")} - {"+", "-", ""}
for theme in get_themes(themes, themes_db):
yield excerpt, source, location, theme[0], theme[1]
if __name__ == "__main__":
with open(sys.argv[1], "r") as f, open(sys.argv[2], 'w') as f2:
reader = csv.reader(f)
next(reader)
writer = csv.writer(f2)
writer.writerow("Verbatim Excerpt Source Location Benefit Themes".split(" "))
writer.writerows(main(list(reader)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment