andychase/processing_script.py

## processing_script.py
import sys
import csv
from collections import Counter


def get_themes(themes, themes_db):
    for theme in themes:
        percent = themes_db[theme] / len(list(themes_db.elements()))
        yield f"{int(percent*100)}%", f"{theme.capitalize()}"


def main(rows):
    themes_db = Counter()
    for row in rows:
        excerpt, source, location, all_themes = row[:4]
        themes = {i.strip().strip(")").lower() for i in all_themes.split("(")} - {"+", "-"}
        themes_db.update({theme: 1 for theme in themes})
    for row in rows:
        excerpt, source, location, all_themes = row[:4]
        themes = {i.strip(" /-+)(").lower() for i in all_themes.split("(")} - {"+", "-", ""}
        for theme in get_themes(themes, themes_db):
            yield excerpt, source, location, theme[0], theme[1]


if __name__ == "__main__":
    with open(sys.argv[1], "r") as f, open(sys.argv[2], 'w') as f2:
        reader = csv.reader(f)
        next(reader)
        writer = csv.writer(f2)
        writer.writerow("Verbatim Excerpt	Source	Location	Benefit Themes".split("	"))
        writer.writerows(main(list(reader)))
	import sys
	import csv
	from collections import Counter


	def get_themes(themes, themes_db):
	for theme in themes:
	percent = themes_db[theme] / len(list(themes_db.elements()))
	yield f"{int(percent*100)}%", f"{theme.capitalize()}"


	def main(rows):
	themes_db = Counter()
	for row in rows:
	excerpt, source, location, all_themes = row[:4]
	themes = {i.strip().strip(")").lower() for i in all_themes.split("(")} - {"+", "-"}
	themes_db.update({theme: 1 for theme in themes})
	for row in rows:
	excerpt, source, location, all_themes = row[:4]
	themes = {i.strip(" /-+)(").lower() for i in all_themes.split("(")} - {"+", "-", ""}
	for theme in get_themes(themes, themes_db):
	yield excerpt, source, location, theme[0], theme[1]


	if __name__ == "__main__":
	with open(sys.argv[1], "r") as f, open(sys.argv[2], 'w') as f2:
	reader = csv.reader(f)
	next(reader)
	writer = csv.writer(f2)
	writer.writerow("Verbatim Excerpt Source Location Benefit Themes".split(" "))
	writer.writerows(main(list(reader)))