Skip to content

Instantly share code, notes, and snippets.

@Underdoge
Last active September 23, 2023 04:15
Show Gist options
  • Save Underdoge/20a849cc4c866d1b1aea325cdf823bfd to your computer and use it in GitHub Desktop.
Save Underdoge/20a849cc4c866d1b1aea325cdf823bfd to your computer and use it in GitHub Desktop.
Write IMDb ratings into m3u movie list
from bs4 import BeautifulSoup
import re
import requests
import pandas as pd
import getopt
import sys
import unidecode as ud
def mainSearch(titleURL, headers):
url = 'https://www.imdb.com/find?q=' + titleURL + "&s=tt&ttype=\
ft&ref_=fn_ft"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode(
a.text.replace("&", "and").replace("?", "").replace("!", "")
.replace("-", "").replace("'", "").replace(":", "")))
.replace(" ", " ").lower() for a in soup.select(
'div.ipc-metadata-list-summary-item__c a')]
href = [a.attrs.get('href') for a in soup.select(
'div.ipc-metadata-list-summary-item__tc a')]
return pd.DataFrame({'Name': titles, 'Href': href}), href, url
def altSearch(titleURL, headers):
url = 'https://www.imdb.com/search/title/?title=' + titleURL
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode(
a.text.replace("&", "and").replace("?", "").replace("!", "")
.replace("-", "").replace("'", "").replace(":", "")))
.replace(" ", " ").lower() for a in soup.select(
'h3.lister-item-header a')]
href = [a.attrs.get('href') for a in soup.select(
'h3.lister-item-header a')]
return pd.DataFrame({'Name': titles, 'Href': href}), href, url
def getIMDbRating(movietitle):
headers = {
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/\
107.0.1418.52"}
titleURL = movietitle.replace(" ", "+")
results, href, url = mainSearch(titleURL, headers)
index = 0
if (len(results) <= 0):
results, href, url = altSearch(titleURL, headers)
if (len(results) <= 0):
if (debug):
print(f"Search URL: {url}")
print(f"Results (a): {results}")
return "N/A"
if (len(results[results.apply(
lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0):
results, href, url = altSearch(titleURL, headers)
if (len(results[results.apply(
lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0):
if (debug):
print(f"Search URL: {url}")
print(f"Results (b): {results}")
return "N/A"
index = results[results.apply(
lambda row: row.Name == movietitle.lower(), axis=1)].index[0]
if (len(href) <= 0 or href[index][0:7:] != "/title/"):
if (debug):
print(f"Search URL: {url}")
print(f"Results (c): {results}")
return "N/A"
url = 'https://www.imdb.com' + href[index]
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
rating = soup.select(
"div[data-testid=hero-rating-bar__aggregate-rating__score] span")
if (len(rating) <= 0):
if (debug):
print(f"Result URL: {url}")
print(f"Results (d): {results}")
print(f"* The following movie was found but has no rating: \
{rating}")
return "N/A"
try:
float(rating[0].text)
return rating[0].text
except ValueError:
if (debug):
print(f"URL: {url}")
print(f"Rating not a float: {rating}")
return "N/A"
append = False
debug = False
found = []
count = 0
number = "all"
output = ""
help = False
skipDuplicates = False
skip = 2
written = 0
m3uMovieList = ""
singleTitle = ""
options = "adf:hn:o:st:"
argumentList = sys.argv[1:]
long_options = ["Append", "Debug", "File=", "Help",
"Number=", "Output=", "Skip", "Title="]
try:
# Parsing argument
arguments, values = getopt.getopt(argumentList, options, long_options)
# checking each argument
for currentArgument, currentValue in arguments:
if currentArgument in ("-h", "--Help"):
print("\n USAGE: ")
print("\n python3 imdb_ratings.py <Options>")
print("\n OPTIONS:")
print("\n -h This help message")
print(" -f (--File) Input m3u movie list (Required)")
print(" -o (--Output) Output m3u movie list (Optional)")
print(" -s (--Skip) Skip duplicate movies (Optional)")
print(" -d (--Debug) Enable debugging mode (Optional)")
print(" -n (--Number) Number of movies to look up (Optional\
- Default: all)")
print(" -a (--Append) Append rating to movie title (Optional\
- Default: rating will precede the title)")
print(" -t (--Title) Look up a single movie title by name")
print("\n EXAMPLES: ")
print("\n The following command will read the first 100\
movies of the \"movielist.m3u\" file and write them to\n the \"newmovieli\
st.m3u\" file appending the rating to the movie tile, skipping duplicate movie\
titles,\n and will output debugging information:")
print("\n python3 imdb_ratings.py -f movielist.m3u -o newmo\
vielist.m3u -n 100 -a true -s -d")
print("\n The following command will look up the rating for a\
movie named \"Everything Everywhere All At Once\":")
print("\n python3 imdb_ratings.py -t \"Everything Everywher\
e All At Once\"\n")
help = True
elif currentArgument in ("-f", "--File"):
print("m3u List:", currentValue)
m3uMovieList = currentValue
elif currentArgument in ("-n", "--Number"):
print("Number of movies:", currentValue)
number = currentValue
elif currentArgument in ("-a", "--AppendIMDB"):
print("Append rating enabled.")
append = True
elif currentArgument in ("-o", "--Output"):
print(f"Output file: {currentValue}")
output = currentValue
elif currentArgument in ("-s", "--Skip"):
print("Skip duplicates enabled.")
skipDuplicates = True
elif currentArgument in ("-d", "--Debug"):
print("Debugging mode enabled.")
debug = True
elif currentArgument in ("-t", "--Title"):
print("Look up single movie title: ", currentValue)
singleTitle = currentValue
if (not help):
if (number != "all"):
movieNumber = int(number)-1
if (int(number) == 1):
skip = 5
allMovies = pd.DataFrame({'Name': [], 'Rating': []})
if (m3uMovieList != ""):
if (output != ""):
newMovieList = open(output, "w")
else:
newMovieList = open(m3uMovieList[:-4:] + "_new.m3u", "w")
with open(m3uMovieList) as movieList:
for line in movieList:
written += 1
if (written <= skip):
if (number != "all" and count > movieNumber
and written >= skip):
break
else:
written = 0
if (line.startswith("#EXTINF") and "Movie VOD" in line):
title = line[line.rindex(": ")+2:-6:]
found = allMovies[
allMovies.apply(
lambda row: row.Name.lower() == title.lower(),
axis=1)]
if (len(found) == 0):
rating = getIMDbRating(title)
print(f"{count+1}) Title: \"{title}\",\
Rating: {rating}")
allMovies.loc[len(allMovies.index)] = [title,
rating]
count += 1
else:
rating = found['Rating'].to_string(index=False)
if (skipDuplicates):
print(f"* Skipping duplicate: \"{title}\",\
Rating: {rating}")
else:
print(f"* Duplicate: \"{title}\",\
Rating: {rating}")
if (append):
newline = line[0:-1:] + f" ({rating})" + line[-1::]
else:
newline = line[
0:line.rindex("HD : "):] + f"{rating} " + line[
line.rindex("HD : ")::]
if (not skipDuplicates or
(skipDuplicates and len(found) == 0)):
newMovieList.write(newline)
else:
if (line.startswith("#EXTINF")):
channel = line[line.rindex(",")+1::].replace(
"\n", "")
print(f"Adding channel: \"{channel}\"")
if (not skipDuplicates or
(skipDuplicates and len(found) == 0)):
newMovieList.write(line)
else:
if (singleTitle != ""):
rating = getIMDbRating(singleTitle)
print(f"Rating: {rating}")
except getopt.error as err:
print(str(err))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment