Underdoge/imdb_ratings.py

## imdb_ratings.py
from bs4 import BeautifulSoup
import re
import requests
import pandas as pd
import getopt
import sys
import unidecode as ud


def mainSearch(titleURL, headers):
    url = 'https://www.imdb.com/find?q=' + titleURL + "&s=tt&ttype=\
ft&ref_=fn_ft"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode(
        a.text.replace("&", "and").replace("?", "").replace("!", "")
        .replace("-", "").replace("'", "").replace(":", "")))
        .replace("  ", " ").lower() for a in soup.select(
            'div.ipc-metadata-list-summary-item__c a')]
    href = [a.attrs.get('href') for a in soup.select(
        'div.ipc-metadata-list-summary-item__tc a')]
    return pd.DataFrame({'Name': titles, 'Href': href}), href, url


def altSearch(titleURL, headers):
    url = 'https://www.imdb.com/search/title/?title=' + titleURL
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode(
        a.text.replace("&", "and").replace("?", "").replace("!", "")
        .replace("-", "").replace("'", "").replace(":", "")))
        .replace("  ", " ").lower() for a in soup.select(
            'h3.lister-item-header a')]
    href = [a.attrs.get('href') for a in soup.select(
        'h3.lister-item-header a')]
    return pd.DataFrame({'Name': titles, 'Href': href}), href, url


def getIMDbRating(movietitle):
    headers = {
        'Accept-Language': 'en-US,en;q=0.5',
        'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/\
107.0.1418.52"}
    titleURL = movietitle.replace(" ", "+")
    results, href, url = mainSearch(titleURL, headers)
    index = 0
    if (len(results) <= 0):
        results, href, url = altSearch(titleURL, headers)
        if (len(results) <= 0):
            if (debug):
                print(f"Search URL: {url}")
                print(f"Results (a): {results}")
            return "N/A"
    if (len(results[results.apply(
            lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0):
        results, href, url = altSearch(titleURL, headers)
        if (len(results[results.apply(
                lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0):
            if (debug):
                print(f"Search URL: {url}")
                print(f"Results (b): {results}")
            return "N/A"
    index = results[results.apply(
            lambda row: row.Name == movietitle.lower(), axis=1)].index[0]
    if (len(href) <= 0 or href[index][0:7:] != "/title/"):
        if (debug):
            print(f"Search URL: {url}")
            print(f"Results (c): {results}")
        return "N/A"
    url = 'https://www.imdb.com' + href[index]
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    rating = soup.select(
        "div[data-testid=hero-rating-bar__aggregate-rating__score] span")

    if (len(rating) <= 0):
        if (debug):
            print(f"Result URL: {url}")
            print(f"Results (d): {results}")
            print(f"*  The following movie was found but has no rating: \
{rating}")
        return "N/A"
    try:
        float(rating[0].text)
        return rating[0].text
    except ValueError:
        if (debug):
            print(f"URL: {url}")
            print(f"Rating not a float: {rating}")
        return "N/A"


append = False
debug = False
found = []
count = 0
number = "all"
output = ""
help = False
skipDuplicates = False
skip = 2
written = 0
m3uMovieList = ""
singleTitle = ""
options = "adf:hn:o:st:"
argumentList = sys.argv[1:]
long_options = ["Append", "Debug", "File=", "Help",
                "Number=", "Output=", "Skip", "Title="]

try:
    # Parsing argument
    arguments, values = getopt.getopt(argumentList, options, long_options)

    # checking each argument
    for currentArgument, currentValue in arguments:
        if currentArgument in ("-h", "--Help"):
            print("\n  USAGE: ")
            print("\n      python3 imdb_ratings.py <Options>")
            print("\n  OPTIONS:")
            print("\n    -h              This help message")
            print("    -f (--File)     Input m3u movie list (Required)")
            print("    -o (--Output)   Output m3u movie list (Optional)")
            print("    -s (--Skip)     Skip duplicate movies (Optional)")
            print("    -d (--Debug)    Enable debugging mode (Optional)")
            print("    -n (--Number)   Number of movies to look up (Optional\
 - Default: all)")
            print("    -a (--Append)   Append rating to movie title (Optional\
 - Default: rating will precede the title)")
            print("    -t (--Title)    Look up a single movie title by name")
            print("\n  EXAMPLES: ")
            print("\n      The following command will read the first 100\
 movies of the \"movielist.m3u\" file and write them to\n    the \"newmovieli\
st.m3u\" file appending the rating to the movie tile, skipping duplicate movie\
 titles,\n    and will output debugging information:")
            print("\n        python3 imdb_ratings.py -f movielist.m3u -o newmo\
vielist.m3u -n 100 -a true -s -d")
            print("\n      The following command will look up the rating for a\
 movie named \"Everything Everywhere All At Once\":")
            print("\n        python3 imdb_ratings.py -t \"Everything Everywher\
e All At Once\"\n")
            help = True
        elif currentArgument in ("-f", "--File"):
            print("m3u List:", currentValue)
            m3uMovieList = currentValue

        elif currentArgument in ("-n", "--Number"):
            print("Number of movies:", currentValue)
            number = currentValue

        elif currentArgument in ("-a", "--AppendIMDB"):
            print("Append rating enabled.")
            append = True

        elif currentArgument in ("-o", "--Output"):
            print(f"Output file: {currentValue}")
            output = currentValue

        elif currentArgument in ("-s", "--Skip"):
            print("Skip duplicates enabled.")
            skipDuplicates = True

        elif currentArgument in ("-d", "--Debug"):
            print("Debugging mode enabled.")
            debug = True

        elif currentArgument in ("-t", "--Title"):
            print("Look up single movie title: ", currentValue)
            singleTitle = currentValue

    if (not help):
        if (number != "all"):
            movieNumber = int(number)-1
            if (int(number) == 1):
                skip = 5

        allMovies = pd.DataFrame({'Name': [], 'Rating': []})
        if (m3uMovieList != ""):
            if (output != ""):
                newMovieList = open(output, "w")
            else:
                newMovieList = open(m3uMovieList[:-4:] + "_new.m3u", "w")
            with open(m3uMovieList) as movieList:
                for line in movieList:
                    written += 1
                    if (written <= skip):
                        if (number != "all" and count > movieNumber
                                and written >= skip):
                            break
                    else:
                        written = 0
                    if (line.startswith("#EXTINF") and "Movie VOD" in line):
                        title = line[line.rindex(": ")+2:-6:]
                        found = allMovies[
                            allMovies.apply(
                                lambda row: row.Name.lower() == title.lower(),
                                axis=1)]
                        if (len(found) == 0):
                            rating = getIMDbRating(title)
                            print(f"{count+1}) Title: \"{title}\",\
 Rating: {rating}")
                            allMovies.loc[len(allMovies.index)] = [title,
                                                                   rating]
                            count += 1
                        else:
                            rating = found['Rating'].to_string(index=False)
                            if (skipDuplicates):
                                print(f"*  Skipping duplicate: \"{title}\",\
 Rating: {rating}")
                            else:
                                print(f"*  Duplicate: \"{title}\",\
Rating: {rating}")
                        if (append):
                            newline = line[0:-1:] + f" ({rating})" + line[-1::]
                        else:
                            newline = line[
                                0:line.rindex("HD : "):] + f"{rating} " + line[
                                    line.rindex("HD : ")::]
                        if (not skipDuplicates or
                                (skipDuplicates and len(found) == 0)):
                            newMovieList.write(newline)
                    else:
                        if (line.startswith("#EXTINF")):
                            channel = line[line.rindex(",")+1::].replace(
                                "\n", "")
                            print(f"Adding channel: \"{channel}\"")
                        if (not skipDuplicates or
                                (skipDuplicates and len(found) == 0)):
                            newMovieList.write(line)
        else:
            if (singleTitle != ""):
                rating = getIMDbRating(singleTitle)
                print(f"Rating: {rating}")

except getopt.error as err:
    print(str(err))
	from bs4 import BeautifulSoup
	import re
	import requests
	import pandas as pd
	import getopt
	import sys
	import unidecode as ud


	def mainSearch(titleURL, headers):
	url = 'https://www.imdb.com/find?q=' + titleURL + "&s=tt&ttype=\
	ft&ref_=fn_ft"
	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.text, "html.parser")
	titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode(
	a.text.replace("&", "and").replace("?", "").replace("!", "")
	.replace("-", "").replace("'", "").replace(":", "")))
	.replace(" ", " ").lower() for a in soup.select(
	'div.ipc-metadata-list-summary-item__c a')]
	href = [a.attrs.get('href') for a in soup.select(
	'div.ipc-metadata-list-summary-item__tc a')]
	return pd.DataFrame({'Name': titles, 'Href': href}), href, url


	def altSearch(titleURL, headers):
	url = 'https://www.imdb.com/search/title/?title=' + titleURL
	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.text, "html.parser")
	titles = [re.sub(r'[^a-zA-Z0-9\s]+', ' ', ud.unidecode(
	a.text.replace("&", "and").replace("?", "").replace("!", "")
	.replace("-", "").replace("'", "").replace(":", "")))
	.replace(" ", " ").lower() for a in soup.select(
	'h3.lister-item-header a')]
	href = [a.attrs.get('href') for a in soup.select(
	'h3.lister-item-header a')]
	return pd.DataFrame({'Name': titles, 'Href': href}), href, url


	def getIMDbRating(movietitle):
	headers = {
	'Accept-Language': 'en-US,en;q=0.5',
	'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)\
	AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/\
	107.0.1418.52"}
	titleURL = movietitle.replace(" ", "+")
	results, href, url = mainSearch(titleURL, headers)
	index = 0
	if (len(results) <= 0):
	results, href, url = altSearch(titleURL, headers)
	if (len(results) <= 0):
	if (debug):
	print(f"Search URL: {url}")
	print(f"Results (a): {results}")
	return "N/A"
	if (len(results[results.apply(
	lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0):
	results, href, url = altSearch(titleURL, headers)
	if (len(results[results.apply(
	lambda row: row.Name == movietitle.lower(), axis=1)]) <= 0):
	if (debug):
	print(f"Search URL: {url}")
	print(f"Results (b): {results}")
	return "N/A"
	index = results[results.apply(
	lambda row: row.Name == movietitle.lower(), axis=1)].index[0]
	if (len(href) <= 0 or href[index][0:7:] != "/title/"):
	if (debug):
	print(f"Search URL: {url}")
	print(f"Results (c): {results}")
	return "N/A"
	url = 'https://www.imdb.com' + href[index]
	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.text, "html.parser")
	rating = soup.select(
	"div[data-testid=hero-rating-bar__aggregate-rating__score] span")

	if (len(rating) <= 0):
	if (debug):
	print(f"Result URL: {url}")
	print(f"Results (d): {results}")
	print(f"* The following movie was found but has no rating: \
	{rating}")
	return "N/A"
	try:
	float(rating[0].text)
	return rating[0].text
	except ValueError:
	if (debug):
	print(f"URL: {url}")
	print(f"Rating not a float: {rating}")
	return "N/A"


	append = False
	debug = False
	found = []
	count = 0
	number = "all"
	output = ""
	help = False
	skipDuplicates = False
	skip = 2
	written = 0
	m3uMovieList = ""
	singleTitle = ""
	options = "adf:hn:o:st:"
	argumentList = sys.argv[1:]
	long_options = ["Append", "Debug", "File=", "Help",
	"Number=", "Output=", "Skip", "Title="]

	try:
	# Parsing argument
	arguments, values = getopt.getopt(argumentList, options, long_options)

	# checking each argument
	for currentArgument, currentValue in arguments:
	if currentArgument in ("-h", "--Help"):
	print("\n USAGE: ")
	print("\n python3 imdb_ratings.py <Options>")
	print("\n OPTIONS:")
	print("\n -h This help message")
	print(" -f (--File) Input m3u movie list (Required)")
	print(" -o (--Output) Output m3u movie list (Optional)")
	print(" -s (--Skip) Skip duplicate movies (Optional)")
	print(" -d (--Debug) Enable debugging mode (Optional)")
	print(" -n (--Number) Number of movies to look up (Optional\
	- Default: all)")
	print(" -a (--Append) Append rating to movie title (Optional\
	- Default: rating will precede the title)")
	print(" -t (--Title) Look up a single movie title by name")
	print("\n EXAMPLES: ")
	print("\n The following command will read the first 100\
	movies of the \"movielist.m3u\" file and write them to\n the \"newmovieli\
	st.m3u\" file appending the rating to the movie tile, skipping duplicate movie\
	titles,\n and will output debugging information:")
	print("\n python3 imdb_ratings.py -f movielist.m3u -o newmo\
	vielist.m3u -n 100 -a true -s -d")
	print("\n The following command will look up the rating for a\
	movie named \"Everything Everywhere All At Once\":")
	print("\n python3 imdb_ratings.py -t \"Everything Everywher\
	e All At Once\"\n")
	help = True
	elif currentArgument in ("-f", "--File"):
	print("m3u List:", currentValue)
	m3uMovieList = currentValue

	elif currentArgument in ("-n", "--Number"):
	print("Number of movies:", currentValue)
	number = currentValue

	elif currentArgument in ("-a", "--AppendIMDB"):
	print("Append rating enabled.")
	append = True

	elif currentArgument in ("-o", "--Output"):
	print(f"Output file: {currentValue}")
	output = currentValue

	elif currentArgument in ("-s", "--Skip"):
	print("Skip duplicates enabled.")
	skipDuplicates = True

	elif currentArgument in ("-d", "--Debug"):
	print("Debugging mode enabled.")
	debug = True

	elif currentArgument in ("-t", "--Title"):
	print("Look up single movie title: ", currentValue)
	singleTitle = currentValue

	if (not help):
	if (number != "all"):
	movieNumber = int(number)-1
	if (int(number) == 1):
	skip = 5

	allMovies = pd.DataFrame({'Name': [], 'Rating': []})
	if (m3uMovieList != ""):
	if (output != ""):
	newMovieList = open(output, "w")
	else:
	newMovieList = open(m3uMovieList[:-4:] + "_new.m3u", "w")
	with open(m3uMovieList) as movieList:
	for line in movieList:
	written += 1
	if (written <= skip):
	if (number != "all" and count > movieNumber
	and written >= skip):
	break
	else:
	written = 0
	if (line.startswith("#EXTINF") and "Movie VOD" in line):
	title = line[line.rindex(": ")+2:-6:]
	found = allMovies[
	allMovies.apply(
	lambda row: row.Name.lower() == title.lower(),
	axis=1)]
	if (len(found) == 0):
	rating = getIMDbRating(title)
	print(f"{count+1}) Title: \"{title}\",\
	Rating: {rating}")
	allMovies.loc[len(allMovies.index)] = [title,
	rating]
	count += 1
	else:
	rating = found['Rating'].to_string(index=False)
	if (skipDuplicates):
	print(f"* Skipping duplicate: \"{title}\",\
	Rating: {rating}")
	else:
	print(f"* Duplicate: \"{title}\",\
	Rating: {rating}")
	if (append):
	newline = line[0:-1:] + f" ({rating})" + line[-1::]
	else:
	newline = line[
	0:line.rindex("HD : "):] + f"{rating} " + line[
	line.rindex("HD : ")::]
	if (not skipDuplicates or
	(skipDuplicates and len(found) == 0)):
	newMovieList.write(newline)
	else:
	if (line.startswith("#EXTINF")):
	channel = line[line.rindex(",")+1::].replace(
	"\n", "")
	print(f"Adding channel: \"{channel}\"")
	if (not skipDuplicates or
	(skipDuplicates and len(found) == 0)):
	newMovieList.write(line)
	else:
	if (singleTitle != ""):
	rating = getIMDbRating(singleTitle)
	print(f"Rating: {rating}")

	except getopt.error as err:
	print(str(err))