Skip to content

Instantly share code, notes, and snippets.

@liamstrilchuk
Created September 14, 2020 17:25
Show Gist options
  • Save liamstrilchuk/72d5b44357ed157d2893ba796e96cc91 to your computer and use it in GitHub Desktop.
Save liamstrilchuk/72d5b44357ed157d2893ba796e96cc91 to your computer and use it in GitHub Desktop.
Get words and characters of post titles
import json, math
def main():
data = json.loads(open("filteredposts.txt", "r").read())
subs = {}
word_counts = {}
allowed_subs = []
subs_data = open("subreddits.csv", "r").read().split("\n")
all_subs = {}
for sub in subs_data[-(len(subs_data) - 1):]:
if len(sub) < 3:
continue
all_subs[sub.split(",")[0]] = int(sub.split(",")[1])
allowed_subs = sorted(all_subs, key=lambda item: all_subs[item], reverse=True)[:30]
for line in data:
if line[1] in subs:
subs[line[1]][0] += len(line[0])
subs[line[1]][1] += 1
else:
subs[line[1]] = [len(line[0]), 1]
if line[1] in word_counts:
word_counts[line[1]][0] += len(line[0].split(" "))
word_counts[line[1]][1] += 1
else:
word_counts[line[1]] = [len(line[0].split(" ")), 1]
results = [item for item in sorted(subs, key=lambda item: subs[item][0] / subs[item][1]) if item in allowed_subs and subs[item][1] > 20]
for item in results:
print(f"{item}: {math.floor(subs[item][0] / subs[item][1] * 10) / 10}")
f = open("lengthresults.csv", "a")
f.write(f"{item},{math.floor(word_counts[item][0] / word_counts[item][1] * 10) / 10},{math.floor(subs[item][0] / subs[item][1] * 10) / 10}\n")
f.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment