Created
June 8, 2019 19:05
-
-
Save gopigof/8a9dee0d0abf9e10732e0ea9524817cb to your computer and use it in GitHub Desktop.
YIFY Movies has been one of the leading webistes to torrent movies. Their magnets and trackers have been reliable for very long. Here is a scraper that scrapes movies as intersested.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import bs4 | |
base_url = 'http://www.yify-movies.com' | |
def parse_link(soup_object): | |
movies = {} | |
movie_links = [] | |
for i in soup_object.find_all('div'): | |
if 'c6' in i.get('class'): | |
movie_links.append(i) | |
for i in movie_links: | |
name = i('h2')[0].get_text() | |
movies[name] = {'Name': name} | |
temp_link = i('a')[1].get('href') | |
if temp_link.startswith('/'): temp_link = base_url +temp_link | |
new_soup_object = bs4.BeautifulSoup(requests.get(temp_link).text, 'lxml') | |
magnet_link = new_soup_object.find('div', {'class': 'wish-list'}).find('a').get('href') | |
genre = [i.get_text() for i in new_soup_object.find_all('a') if str(i.get('href')).startswith('/genre/')] | |
imdb_link = new_soup_object.find_all('a')[-1].get('href') | |
other = [] | |
for i in new_soup_object.find_all('b'): | |
other.append((i.text, i.next_sibling)) | |
other[0][1] = genre | |
movies[name].update({'Magnet Link': magnet_link, 'IMDB Link': imdb_link}) | |
for i in other: | |
movies[name].update({i[0]: i[1]}) | |
return movies | |
def main(): | |
page_limit = 1 | |
crawl_option = ['', 'seed/', 'peer/', 'az/', 'za/'] # Time is default if unmentioned | |
url = 'http://www.yify-movies.net/search/1080p/' | |
movie_list = [] | |
for i in range(page_limit): | |
print('Reached') | |
# soup_object = bs4.BeautifulSoup(requests.get(url+'seed/').text, 'lxml') | |
if i != 1: | |
soup_object = bs4.BeautifulSoup(requests.get(url +crawl_option[1] +str(i)).text, 'lxml') | |
else: | |
soup_object = bs4.BeautifulSoup(requests.get(url + crawl_option[1]).text, 'lxml') | |
movie_list.append(parse_link(soup_object)) | |
print(movie_list) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment