Skip to content

Instantly share code, notes, and snippets.

@amir16yp
Created November 29, 2023 19:42
Show Gist options
  • Save amir16yp/5e54e5a3f06ac5d47e0f67dc6d1f2c78 to your computer and use it in GitHub Desktop.
Save amir16yp/5e54e5a3f06ac5d47e0f67dc6d1f2c78 to your computer and use it in GitHub Desktop.
YTS scraper/downloader
#!/usr/bin/python3
import requests
from bs4 import BeautifulSoup
from sys import argv
from os import getcwd
import subprocess
def ask_yes_no_question(question):
while True:
user_response = input(question + " (yes/no): ").strip().lower()
if user_response == "yes":
return True
elif user_response == "no":
return False
else:
print("Please enter 'yes' or 'no'.")
def download_torrent(torrent_url, download_directory):
# Build the aria2c command with necessary options
command = [
'aria2c',
'--dir', download_directory, # Specify the download directory
torrent_url, # URL of the torrent file or magnet link
]
try:
# Run the aria2c command
subprocess.run(command, check=True)
print("Torrent download completed successfully!")
except subprocess.CalledProcessError as e:
print(f"Error: {e}")
def get_movie_info(query):
# URL with the query parameter
url = f"https://yts.mx/browse-movies/{query}/all/all/0/latest/0/all"
movie_info_dict = {} # Initialize an empty dictionary to store the movie information
try:
# Send an HTTP GET request
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')
# Find all div elements whose class attribute starts with "browse-movie-wrap"
movie_wraps = soup.find_all('div', class_=lambda x: x and x.startswith("browse-movie-wrap"))
# Loop through the movie wraps and extract information
for movie_wrap in movie_wraps:
# Find the a tag within the movie wrap
movie_link = movie_wrap.find('a')
# Get the href attribute of the a tag
href = movie_link['href']
# Find the figure tag under the movie wrap
figure = movie_wrap.find('figure')
# Find the img tag under the figure
img = figure.find('img')
# Get the src and alt attributes of the img tag
img_src = img['src']
img_alt = img['alt']
# Strip the title and remove "download" from the last part
img_alt = img_alt.strip().rsplit(" download", 1)[0]
# Add the title and link to the dictionary
movie_info_dict[img_alt] = href
return movie_info_dict
else:
print(f"Failed to retrieve data. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return {}
def get_movie_torrent_links(movie_link):
try:
# Send an HTTP GET request to the movie link
response = requests.get(movie_link)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')
# Find all <a> tags whose href attributes start with "https://yts.mx/torrent/download/"
torrent_links = soup.find_all('a', href=lambda x: x and x.startswith("https://yts.mx/torrent/download/"))
# Create a list of dictionaries to store the links and their text
torrent_info_list = []
# Keep track of encountered torrent links
encountered_links = set()
for torrent_link in torrent_links:
# Get the href and text of the <a> tag
href = torrent_link['href']
text = torrent_link.get_text()
# Check if the link is not a duplicate
if text != "Download" and href not in encountered_links:
# Add the href and text to the list as a dictionary
torrent_info_list.append({'torrent': href, 'quality': text})
# Add the link to the set of encountered links
encountered_links.add(href)
return torrent_info_list
else:
print(f"Failed to retrieve data from {movie_link}. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return []
# Example usage:
query = input("Enter a movie query: ")
movies_info = get_movie_info(query)
if not movies_info:
print("No movies found.")
else:
# Display a numbered list of movie titles
for i, movie_title in enumerate(movies_info.keys(), 1):
print(f"{i} - {movie_title}")
# Let the user select a movie by entering the corresponding number
selection = int(input("Enter the number of the movie you want to select: "))
# Get the selected movie's link
selected_movie = list(movies_info.values())[selection - 1]
# Get torrent links for the selected movie
torrent_links = get_movie_torrent_links(selected_movie)
if not torrent_links:
print("No torrent links found for the selected movie.")
else:
# Display a numbered list of torrent qualities
for i, torrent_info in enumerate(torrent_links, 1):
print(f"{i} - Quality: {torrent_info['quality']}")
# Let the user select a torrent quality by entering the corresponding number
quality_selection = int(input("Enter the number of the torrent quality you want to download: "))
selected_torrent = torrent_links[quality_selection - 1]
print(f"Selected torrent quality: {selected_torrent['quality']}")
print(f"Download link: {selected_torrent['torrent']}")
if ask_yes_no_question("Download this torrent using aria2c?"):
print("Downloading torrent using aria2c...")
download_torrent(selected_torrent['torrent'], getcwd())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment