Last active
May 26, 2023 21:03
-
-
Save NivenT/551893603dee6724ec21d9a12df1ad1c to your computer and use it in GitHub Desktop.
Prints out a list of the shows you have seen on Netflix (+ when you first watched them). Supply the program a .csv downloaded from https://www.netflix.com/viewingactivity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# ^^^^^^^ | |
# I originally wrote this in python2, and then later the minimal edits necessary to get it to run on | |
# python3, so the code isn't always idiomatic. | |
import sys | |
import csv | |
from datetime import datetime | |
class Entry: | |
def __init__(self): | |
self.earliest_date = datetime.max | |
self.num_episodes = 0 | |
def update(self, date): | |
self.earliest_date = min(self.earliest_date, datetime.strptime(date, '%m/%d/%y')) | |
self.num_episodes += 1 | |
def print_usage(prog_name): | |
print('Usage:') | |
print(' {} (-h | --help)'.format(prog_name)) | |
print(' {} VIEWING_ACTIVITY_FILE'.format(prog_name)) | |
print('VIEWING_ACTIVITY_FILE is obtained from https://netflix.com/viewingactivity') | |
def extract_show_title(row): | |
SEASON_WORDS = ['Season', 'Volume', 'Part', 'Chapter', 'Episode', 'act', 'Series'] | |
# This is definitely not a blatant indication of the shows I've watched... | |
ENDING_WORDS = ['Comedians in Cars Getting Coffee', 'Secret City', 'Limited Series', 'Imposters', 'Zach Stone Is Gonna Be Famous', 'Arcane', 'The Chair', 'The IT Crowd', 'Scandal', "Marvel's", "Schitt's Creek", 'Gotham', 'The Sketch Show', 'The Society', 'Tiger King'] | |
parts = map(lambda x: x.strip(), row.split(':')) | |
parts = list(parts) | |
end_idx = len(parts) | |
for i in range(len(parts)): | |
if any(parts[i].startswith(s) for s in SEASON_WORDS): | |
end_idx = i | |
break | |
elif any(parts[i].startswith(s) for s in ENDING_WORDS): | |
end_idx = i+1 | |
break | |
return ': '.join(parts[:end_idx]) | |
def get_shows(file_name): | |
shows = {} # dict Show Title -> Entry | |
with open(file_name, 'r') as csv_file: | |
reader = csv.reader(csv_file) | |
next(reader, None) # skip header | |
for row in reader: | |
show = extract_show_title(row[0]) | |
# There's definitely a better way of writing the below two lines, but this works | |
shows[show] = Entry() if show not in shows else shows[show] | |
shows[show].update(row[1]) | |
return shows | |
def format_show_msg(show, entry): | |
s = 'once' if entry.num_episodes == 1 else '{} times'.format(entry.num_episodes) | |
d = entry.earliest_date.strftime('%b %d, %Y') | |
return '"{}" has been watched {}. You first saw it on {}.'.format(show, s, d) | |
def format_summary_msg(num_shows, start_date, fav, num): | |
return 'You have watched {} shows since {}. Of these, you have watched {} most often ({} episodes).\n'.format(num_shows, start_date.strftime('%b %d, %Y'), fav, num) | |
def print_and_write(out, s): | |
print(s) | |
out.write(s + '\n') | |
if __name__ == '__main__': | |
if len(sys.argv) != 2 or sys.argv[1] in ['-h', '--help']: | |
print_usage(sys.argv[0]) | |
else: | |
out = open('netflix_shows.txt', 'w') | |
entries = get_shows(sys.argv[1]) | |
shows = sorted(entries.keys(), key=lambda s: entries[s].earliest_date, reverse=True) | |
favorite = max(entries.keys(), key=lambda s: entries[s].num_episodes) | |
fav_count = entries[favorite].num_episodes | |
earliest_date = entries[shows[-1]].earliest_date | |
print_and_write(out, format_summary_msg(len(shows), earliest_date, favorite, fav_count)) | |
for show in shows: | |
print_and_write(out, format_show_msg(show, entries[show])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment