Skip to content

Instantly share code, notes, and snippets.

@s-4-m-a-n
Last active May 8, 2020 00:22
Show Gist options
  • Save s-4-m-a-n/1fe9d4ac53cccfd7338deaccf85abd46 to your computer and use it in GitHub Desktop.
Save s-4-m-a-n/1fe9d4ac53cccfd7338deaccf85abd46 to your computer and use it in GitHub Desktop.
an unofficial subscene-API
from bs4 import BeautifulSoup
import requests
import os
import re
class Subscene:
def __init__(self,**kwargs):
self.args = kwargs
self.__header = {
"user-agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
}
self.__setup()
def __str__(self):
return dicToStr(**self.args,URL=self.Domain)
def __repr__(self):
return ("{}".format(self.args))
def __getHTML(self,url):
self.__htmlResponse = requests.get(url).text
return BeautifulSoup(self.__htmlResponse,'lxml')
def __postHTML(self,url):
self.__htmlResponse =requests.post(url,data={'query':self.args['title']},headers=self.__header).text
return BeautifulSoup(self.__htmlResponse,'lxml')
def __setup(self):
self.Domain = "https://www.subscene.com" #home page
self.__pageSearch = "/subtitles/searchbytitle" # query page
self.__soupSearch = self.__postHTML(self.Domain+self.__pageSearch) #subtitles
self.__linkToSub = self.__searchBestResult(self.__soupSearch)
if (self.__linkToSub == "not-found"):
raise Exception("\nerror : !!!!!!Result not found!!!!\n make sure you have entered correct 'title' and 'year'")
self.__subtitleResponse = self.__getHTML(self.Domain+self.__linkToSub)
self.__getSubList(self.__subtitleResponse,self.args['language'])
self.__getZIPlinks() # array of absolute(full) download url
def __searchBestResult(self,soupSearch):
searchList = soupSearch.select(".title a")
searchedMovie = self.__Normalization(self.args['title']+self.args['year'])
for item in searchList:
availableMovie = self.__Normalization(item.text)
if availableMovie == searchedMovie:
return (item.attrs['href'])
return "not-found"
def __Normalization(self,string):
return (re.sub("\s|\(|\)|-|\.","",(string).lower()))
def __getSubList(self,soup,lang):
link = list( x.attrs["href"] for x in soup.select(".a1 a"))
movieTitle = list(x.text for x in soup.select(".a1 span:last-child"))
pattern = "[\r\n\t]"
movieTitle = list(re.sub(pattern,"",movieName) for movieName in movieTitle)
subtitles = list(zip(movieTitle,link))
self.__requiredSubtitleList = list((x,v) for x,v in subtitles if lang.lower() in v ) # default english subtitles tuples
#[('www.TamilRockers.ws - Extraction (2020)[HDRip - [Tamil + Telugu] - XviD - MP3 - 700MB - ESubs] ',
# '/subtitles/extraction-2020/english/2198327'),
# ('Official Trailer - Extraction (2020) ',
# '/subtitles/extraction-2020/english/2183629')]
def __getZIPlinks(self): #zip file download link
self.ZIPlinks = []
self.__totalAvailable = len(self.__requiredSubtitleList)
self.__searchLimit =self.__totalAvailable if (str(self.args['limit'])).lower() == 'all' else int(self.args['limit'])
for i in range(min(self.__totalAvailable,self.__searchLimit)):
soup = self.__getHTML(self.Domain+self.__requiredSubtitleList[i][1])
self.ZIPlinks.append((self.__requiredSubtitleList[i][0],self.Domain+soup.select_one("#downloadButton").attrs["href"])) #list of tuple of (name,zipURL)
#(movie name + zip download full link)
def ZIPfile(self):
files = []
for i in range(len(self.ZIPlinks)):
files.append(requests.get(self.ZIPlinks[i][1])) #request for zip file / zip file download
return files # returns zip file as a response
def showZIP(self):
print(self.ZIPlinks) # display movie title with zip download link in list of tuple
def downloadZIP(self,path=os.path.abspath(os.path.curdir)):
self.ZIPfile()
normPath = os.path.normpath(path)
if(os.path.isdir(normPath)):
for i in range(min(self.__totalAvailable,self.__searchLimit)):
fileName = self.ZIPlinks[i][0]+".zip"
with open(os.path.join(os.path.normpath(path),fileName),"wb") as file:
ZIPfileList = self.ZIPfile()
file.write(ZIPfileList[0].content)
else:
try:
raise Exception("legal path is required / path to a dir ")
except Exceptiton as e:
print("ERROR:"+e)
#----------- functions----------------------------------------------
def dicToStr(**kwargs): # returing string of the dictionary type
string=''
for item,value in kwargs.items():
string +=" "+item+" : "+str(value)+",\n"
string = "{}\n{} {}".format('{',string,'}')
return string
#------------ returns instance of the subscene object ---------------
#------------ main function to be called----------------------------
def search(title=None,year ='',language="English",domain="subscene",limit='all'): # limit specifies nos. of subtitle to be fetched
try:
if title == None or year == None:
raise Exception("'title' and 'year' cannot be of None type ")
except ValueError as e :
print("value Error :" + repr(e))
args = {'title':title,'year':year,'language':language,'domain':domain,'limit':limit}
return Subscene(**args)
# if __name__ == "__main__":
# obj = search(title="extraction",year="2015",language="Arabic",limit=2)
# obj.ZIPfile()
# print(obj.ZIPlinks)
# print(dir(obj))
# print(obj.Domain)
# print(obj)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment