Skip to content

Instantly share code, notes, and snippets.

@robgon-art
Created August 23, 2020 16:41
Show Gist options
  • Save robgon-art/ee9209c95d3c0b55d9ff018142cbbb07 to your computer and use it in GitHub Desktop.
Save robgon-art/ee9209c95d3c0b55d9ff018142cbbb07 to your computer and use it in GitHub Desktop.
Compile a list of titles
# Function to remove articles at the head of titles
def remove_leading_article(title):
if title.startswith("The "):
title = title[4:]
if title.startswith("A "):
title = title[2:]
return title
# Get the titles of books, movies, and TV shows
import csv
titles = {}
with open('booksummaries/booksummaries.txt', newline='', encoding='utf-8') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
title = row[2]
title = remove_leading_article(title)
titles[title.lower()] = True
with open('title.basics.tsv') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
title = row[3]
title = remove_leading_article(title)
titles[title.lower()] = True
with open('books.csv') as f:
reader = csv.reader(f)
for row in reader:
title = row[1]
title = title.split('(')[0].strip() # Remove everything after a left paren
title = remove_leading_article(title)
titles[title.lower()] = True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment