Skip to content

Instantly share code, notes, and snippets.

@hombit
Last active February 10, 2018 08:53
Show Gist options
  • Save hombit/1995cd888b3d0ce4f92e49a19ecad386 to your computer and use it in GitHub Desktop.
Save hombit/1995cd888b3d0ce4f92e49a19ecad386 to your computer and use it in GitHub Desktop.
# http://homb.it/south/
import collections
def get_south_park_dict():
with open('all-seasons.csv', encoding='utf-8') as f:
d = {}
quote = ''
for line in f:
if line == 'Season,Episode,Character,Line\n':
continue
if quote == '':
season, episode, char, quote = line.split(',', maxsplit=3)
else:
quote += line
if line == '"\n':
season_dict = d.setdefault(season, {})
episode_dict = season_dict.setdefault(episode, {})
quote_list = episode_dict.setdefault(char, [])
quote_list.append(quote)
quote = ''
return d
def chatterboxes(d, season, episode, top_n):
counter = collections.Counter()
for k, v in d[season][episode].items():
counter[k] = len(v)
return counter.most_common(top_n)
def chatterbox(d, season, episode):
return chatterboxes(d, season, episode, 1)[0][0]
def episodes_with_word(d, word, at_most):
episodes = []
word = word.lower()
for season, season_dict in d.items():
for episode, episode_dict in season_dict.items():
n = 0
for char, quote_list in episode_dict.items():
for quote in quote_list:
n += quote.lower().count(word)
if n >= at_most:
episodes.append(season+'.'+episode)
return episodes
d = get_south_park_dict()
print(chatterboxes(d, '4', '11', 4))
print(chatterbox(d, '4', '11'))
print(episodes_with_word(d, 'killed Kenny', 3))
# http://homb.it/south/
import collections
with open('all-seasons.csv', encoding='utf-8') as f:
# text = f.read()
quote = ''
d = {}
for line in f:
if line == 'Season,Episode,Character,Line\n':
continue
if quote == '':
season, episode, char, quote = line.split(',', maxsplit=3)
else:
quote = quote + line
if line == '"\n':
season_dict = d.setdefault(season, {})
episode_dict = season_dict.setdefault(episode, {})
quote_list = episode_dict.setdefault(char, [])
quote_list.append(quote)
quote = ''
print(d['9']['4']['Stan'])
c = collections.Counter()
for k, v in d.items():
c[k] = len(v)
print(c.most_common(5))
c = collections.Counter()
season = '10'
episod = '5'
for k, v in d[season][episod].items():
c[k] = len(v)
print(c.most_common())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment