Skip to content

Instantly share code, notes, and snippets.

@gopigof
Created June 8, 2019 19:05
Show Gist options
  • Save gopigof/8a9dee0d0abf9e10732e0ea9524817cb to your computer and use it in GitHub Desktop.
Save gopigof/8a9dee0d0abf9e10732e0ea9524817cb to your computer and use it in GitHub Desktop.
YIFY Movies has been one of the leading webistes to torrent movies. Their magnets and trackers have been reliable for very long. Here is a scraper that scrapes movies as intersested.
import requests
import bs4
base_url = 'http://www.yify-movies.com'
def parse_link(soup_object):
movies = {}
movie_links = []
for i in soup_object.find_all('div'):
if 'c6' in i.get('class'):
movie_links.append(i)
for i in movie_links:
name = i('h2')[0].get_text()
movies[name] = {'Name': name}
temp_link = i('a')[1].get('href')
if temp_link.startswith('/'): temp_link = base_url +temp_link
new_soup_object = bs4.BeautifulSoup(requests.get(temp_link).text, 'lxml')
magnet_link = new_soup_object.find('div', {'class': 'wish-list'}).find('a').get('href')
genre = [i.get_text() for i in new_soup_object.find_all('a') if str(i.get('href')).startswith('/genre/')]
imdb_link = new_soup_object.find_all('a')[-1].get('href')
other = []
for i in new_soup_object.find_all('b'):
other.append((i.text, i.next_sibling))
other[0][1] = genre
movies[name].update({'Magnet Link': magnet_link, 'IMDB Link': imdb_link})
for i in other:
movies[name].update({i[0]: i[1]})
return movies
def main():
page_limit = 1
crawl_option = ['', 'seed/', 'peer/', 'az/', 'za/'] # Time is default if unmentioned
url = 'http://www.yify-movies.net/search/1080p/'
movie_list = []
for i in range(page_limit):
print('Reached')
# soup_object = bs4.BeautifulSoup(requests.get(url+'seed/').text, 'lxml')
if i != 1:
soup_object = bs4.BeautifulSoup(requests.get(url +crawl_option[1] +str(i)).text, 'lxml')
else:
soup_object = bs4.BeautifulSoup(requests.get(url + crawl_option[1]).text, 'lxml')
movie_list.append(parse_link(soup_object))
print(movie_list)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment