Skip to content

Instantly share code, notes, and snippets.

@NivenT
Last active May 26, 2023 21:03
Show Gist options
  • Save NivenT/551893603dee6724ec21d9a12df1ad1c to your computer and use it in GitHub Desktop.
Save NivenT/551893603dee6724ec21d9a12df1ad1c to your computer and use it in GitHub Desktop.
Prints out a list of the shows you have seen on Netflix (+ when you first watched them). Supply the program a .csv downloaded from https://www.netflix.com/viewingactivity
#!/usr/bin/python3
# ^^^^^^^
# I originally wrote this in python2, and then later the minimal edits necessary to get it to run on
# python3, so the code isn't always idiomatic.
import sys
import csv
from datetime import datetime
class Entry:
def __init__(self):
self.earliest_date = datetime.max
self.num_episodes = 0
def update(self, date):
self.earliest_date = min(self.earliest_date, datetime.strptime(date, '%m/%d/%y'))
self.num_episodes += 1
def print_usage(prog_name):
print('Usage:')
print(' {} (-h | --help)'.format(prog_name))
print(' {} VIEWING_ACTIVITY_FILE'.format(prog_name))
print('VIEWING_ACTIVITY_FILE is obtained from https://netflix.com/viewingactivity')
def extract_show_title(row):
SEASON_WORDS = ['Season', 'Volume', 'Part', 'Chapter', 'Episode', 'act', 'Series']
# This is definitely not a blatant indication of the shows I've watched...
ENDING_WORDS = ['Comedians in Cars Getting Coffee', 'Secret City', 'Limited Series', 'Imposters', 'Zach Stone Is Gonna Be Famous', 'Arcane', 'The Chair', 'The IT Crowd', 'Scandal', "Marvel's", "Schitt's Creek", 'Gotham', 'The Sketch Show', 'The Society', 'Tiger King']
parts = map(lambda x: x.strip(), row.split(':'))
parts = list(parts)
end_idx = len(parts)
for i in range(len(parts)):
if any(parts[i].startswith(s) for s in SEASON_WORDS):
end_idx = i
break
elif any(parts[i].startswith(s) for s in ENDING_WORDS):
end_idx = i+1
break
return ': '.join(parts[:end_idx])
def get_shows(file_name):
shows = {} # dict Show Title -> Entry
with open(file_name, 'r') as csv_file:
reader = csv.reader(csv_file)
next(reader, None) # skip header
for row in reader:
show = extract_show_title(row[0])
# There's definitely a better way of writing the below two lines, but this works
shows[show] = Entry() if show not in shows else shows[show]
shows[show].update(row[1])
return shows
def format_show_msg(show, entry):
s = 'once' if entry.num_episodes == 1 else '{} times'.format(entry.num_episodes)
d = entry.earliest_date.strftime('%b %d, %Y')
return '"{}" has been watched {}. You first saw it on {}.'.format(show, s, d)
def format_summary_msg(num_shows, start_date, fav, num):
return 'You have watched {} shows since {}. Of these, you have watched {} most often ({} episodes).\n'.format(num_shows, start_date.strftime('%b %d, %Y'), fav, num)
def print_and_write(out, s):
print(s)
out.write(s + '\n')
if __name__ == '__main__':
if len(sys.argv) != 2 or sys.argv[1] in ['-h', '--help']:
print_usage(sys.argv[0])
else:
out = open('netflix_shows.txt', 'w')
entries = get_shows(sys.argv[1])
shows = sorted(entries.keys(), key=lambda s: entries[s].earliest_date, reverse=True)
favorite = max(entries.keys(), key=lambda s: entries[s].num_episodes)
fav_count = entries[favorite].num_episodes
earliest_date = entries[shows[-1]].earliest_date
print_and_write(out, format_summary_msg(len(shows), earliest_date, favorite, fav_count))
for show in shows:
print_and_write(out, format_show_msg(show, entries[show]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment