Skip to content

Instantly share code, notes, and snippets.

@jairopinilla
Last active May 31, 2020 19:27
Show Gist options
  • Save jairopinilla/994f274f0821a7fff059905ce63f1337 to your computer and use it in GitHub Desktop.
Save jairopinilla/994f274f0821a7fff059905ce63f1337 to your computer and use it in GitHub Desktop.
Code to get genres of imbd by tittle of the movie.
from imdb import IMDb
import re
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
from datetime import datetime, timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import requests
import time
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
driver = webdriver.Chrome('chromedriver.exe')
ia = IMDb()
#ListaContenido array with the names of the movies
#x[1][0] position of the name
Contenidosclasificacion=[]
for x in ListaContenido.iterrows():
nombreRecursorigin=x[1][0]
try:
nombreRecurso=nombreRecursorigin.replace(' ','+')
driver.get('https://www.imdb.com/find?s=tt&q='+nombreRecurso+'&ref_=nv_sr_sm')
button = driver.find_element_by_xpath('//*[@id="main"]/div/div[2]/table/tbody/tr[1]/td[2]/a')
button.click()
ListaGeneros=driver.find_elements_by_xpath('//*[@id="titleStoryLine"]/div[4]/a')
url=str(driver.current_url)
#print(url)
m = re.search('title/tt(.+?)/', url)
#print(m)
if m:
found = m.group(1)
#print(found)
movie = ia.get_movie(found)
generos=''
for genre in movie['genres']:
#print(genre)
generos=generos + ',' + genre
Contenidosclasificacion.append({'contenido:':nombreRecursorigin,'generos':generos, 'idimdb':found })
print(nombreRecursorigin,generos)
except:
generos='SIN CLASIFICAR'
Contenidosclasificacion.append({'contenido:':nombreRecursorigin,'generos':generos , 'idimdb':found })
print(nombreRecursorigin,generos)
dataClas = pd.DataFrame(Contenidosclasificacion)
dataClas.head()
dataClas.to_excel("contenidoClasificado.xlsx")
#dataClas array with the name of the movies, genres and id of imbd
@jairopinilla
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment