NivenT/netflix_shows.py

## netflix_shows.py
#!/usr/bin/python3

#          ^^^^^^^
# I originally wrote this in python2, and then later the minimal edits necessary to get it to run on
# python3, so the code isn't always idiomatic.

import sys
import csv
from datetime import datetime

class Entry:
    def __init__(self):
        self.earliest_date = datetime.max
        self.num_episodes = 0
    def update(self, date):
        self.earliest_date = min(self.earliest_date, datetime.strptime(date, '%m/%d/%y'))
        self.num_episodes += 1

def print_usage(prog_name):
    print('Usage:')
    print('  {} (-h | --help)'.format(prog_name))
    print('  {} VIEWING_ACTIVITY_FILE'.format(prog_name))
    print('VIEWING_ACTIVITY_FILE is obtained from https://netflix.com/viewingactivity')

def extract_show_title(row):
    SEASON_WORDS = ['Season', 'Volume', 'Part', 'Chapter', 'Episode', 'act', 'Series']
    # This is definitely not a blatant indication of the shows I've watched...
    ENDING_WORDS = ['Comedians in Cars Getting Coffee', 'Secret City', 'Limited Series', 'Imposters', 'Zach Stone Is Gonna Be Famous', 'Arcane', 'The Chair', 'The IT Crowd', 'Scandal', "Marvel's", "Schitt's Creek", 'Gotham', 'The Sketch Show', 'The Society', 'Tiger King']

    parts = map(lambda x: x.strip(), row.split(':'))
    parts = list(parts)
    end_idx = len(parts)
    for i in range(len(parts)):
        if any(parts[i].startswith(s) for s in SEASON_WORDS):
            end_idx = i
            break
        elif any(parts[i].startswith(s) for s in ENDING_WORDS):
            end_idx = i+1
            break
    return ': '.join(parts[:end_idx])

def get_shows(file_name):
    shows = {} # dict Show Title -> Entry
    with open(file_name, 'r') as csv_file:
        reader = csv.reader(csv_file)
        next(reader, None) # skip header
        for row in reader:
            show = extract_show_title(row[0])
            # There's definitely a better way of writing the below two lines, but this works
            shows[show] = Entry() if show not in shows else shows[show]
            shows[show].update(row[1])
    return shows

def format_show_msg(show, entry):
    s = 'once' if entry.num_episodes == 1 else '{} times'.format(entry.num_episodes)
    d = entry.earliest_date.strftime('%b %d, %Y')
    return '"{}" has been watched {}. You first saw it on {}.'.format(show, s, d)

def format_summary_msg(num_shows, start_date, fav, num):
    return 'You have watched {} shows since {}. Of these, you have watched {} most often ({} episodes).\n'.format(num_shows, start_date.strftime('%b %d, %Y'), fav, num)

def print_and_write(out, s):
    print(s)
    out.write(s + '\n')

if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] in ['-h', '--help']:
        print_usage(sys.argv[0])
    else:
        out = open('netflix_shows.txt', 'w')
        entries = get_shows(sys.argv[1])
        shows = sorted(entries.keys(), key=lambda s: entries[s].earliest_date, reverse=True)

        favorite = max(entries.keys(), key=lambda s: entries[s].num_episodes)
        fav_count = entries[favorite].num_episodes
        earliest_date = entries[shows[-1]].earliest_date

        print_and_write(out, format_summary_msg(len(shows), earliest_date, favorite, fav_count))
        for show in shows:
            print_and_write(out, format_show_msg(show, entries[show]))
	#!/usr/bin/python3

	# ^^^^^^^
	# I originally wrote this in python2, and then later the minimal edits necessary to get it to run on
	# python3, so the code isn't always idiomatic.

	import sys
	import csv
	from datetime import datetime

	class Entry:
	def __init__(self):
	self.earliest_date = datetime.max
	self.num_episodes = 0
	def update(self, date):
	self.earliest_date = min(self.earliest_date, datetime.strptime(date, '%m/%d/%y'))
	self.num_episodes += 1

	def print_usage(prog_name):
	print('Usage:')
	print(' {} (-h \| --help)'.format(prog_name))
	print(' {} VIEWING_ACTIVITY_FILE'.format(prog_name))
	print('VIEWING_ACTIVITY_FILE is obtained from https://netflix.com/viewingactivity')

	def extract_show_title(row):
	SEASON_WORDS = ['Season', 'Volume', 'Part', 'Chapter', 'Episode', 'act', 'Series']
	# This is definitely not a blatant indication of the shows I've watched...
	ENDING_WORDS = ['Comedians in Cars Getting Coffee', 'Secret City', 'Limited Series', 'Imposters', 'Zach Stone Is Gonna Be Famous', 'Arcane', 'The Chair', 'The IT Crowd', 'Scandal', "Marvel's", "Schitt's Creek", 'Gotham', 'The Sketch Show', 'The Society', 'Tiger King']

	parts = map(lambda x: x.strip(), row.split(':'))
	parts = list(parts)
	end_idx = len(parts)
	for i in range(len(parts)):
	if any(parts[i].startswith(s) for s in SEASON_WORDS):
	end_idx = i
	break
	elif any(parts[i].startswith(s) for s in ENDING_WORDS):
	end_idx = i+1
	break
	return ': '.join(parts[:end_idx])

	def get_shows(file_name):
	shows = {} # dict Show Title -> Entry
	with open(file_name, 'r') as csv_file:
	reader = csv.reader(csv_file)
	next(reader, None) # skip header
	for row in reader:
	show = extract_show_title(row[0])
	# There's definitely a better way of writing the below two lines, but this works
	shows[show] = Entry() if show not in shows else shows[show]
	shows[show].update(row[1])
	return shows

	def format_show_msg(show, entry):
	s = 'once' if entry.num_episodes == 1 else '{} times'.format(entry.num_episodes)
	d = entry.earliest_date.strftime('%b %d, %Y')
	return '"{}" has been watched {}. You first saw it on {}.'.format(show, s, d)

	def format_summary_msg(num_shows, start_date, fav, num):
	return 'You have watched {} shows since {}. Of these, you have watched {} most often ({} episodes).\n'.format(num_shows, start_date.strftime('%b %d, %Y'), fav, num)

	def print_and_write(out, s):
	print(s)
	out.write(s + '\n')

	if __name__ == '__main__':
	if len(sys.argv) != 2 or sys.argv[1] in ['-h', '--help']:
	print_usage(sys.argv[0])
	else:
	out = open('netflix_shows.txt', 'w')
	entries = get_shows(sys.argv[1])
	shows = sorted(entries.keys(), key=lambda s: entries[s].earliest_date, reverse=True)

	favorite = max(entries.keys(), key=lambda s: entries[s].num_episodes)
	fav_count = entries[favorite].num_episodes
	earliest_date = entries[shows[-1]].earliest_date

	print_and_write(out, format_summary_msg(len(shows), earliest_date, favorite, fav_count))
	for show in shows:
	print_and_write(out, format_show_msg(show, entries[show]))