Skip to content

Instantly share code, notes, and snippets.

@TheWhatis
Created February 28, 2022 06:51
Show Gist options
  • Save TheWhatis/9b85eebf4d187211c5ddf1c81cb59742 to your computer and use it in GitHub Desktop.
Save TheWhatis/9b85eebf4d187211c5ddf1c81cb59742 to your computer and use it in GitHub Desktop.
# Import
import re
import os
import sys
import json
import js2py
import random
import requests
import tldextract
# From
from time import sleep
from translate import Translator
from bs4 import BeautifulSoup as bs
from user_agent import generate_navigator as get_uagent
headers = {
"Host": "rt.pornhub.com",
"User-Agent": get_uagent()['user_agent'], #"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://duckduckgo.com/",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "cross-site",
"Sec-GPC": "1",
"Cache-Control": "max-age=0",
"TE": "trailers"
}
proxies = False
def get_domain(url):
domain = url.split("?")[0]
domain_path = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', domain)
domain_path = domain_path[0]
tld = tldextract.extract(domain_path)
return tld
def get_video_urls(page = 1):
url = "https://rt.pornhub.com"
domain = "https://rt.pornhub.com"
if page > 1:
url = url+"/video?page="+str(page)
response = requests.get(url)
if response.status_code == 200:
soup = bs(response.text, 'html.parser')#'html5lib')
links = []
a_html = soup.select(".sectionWrapper li .wrap .title a")
for item in a_html:
links.append(domain+item.get("href").strip())
return links
else:
return False
def get_video(url, allow_quality = False):
response = requests.get(url=url, headers=headers, proxies=proxies)
soup = bs(response.text, 'html.parser')#'html5lib')
scripts = soup.select("#player script")
name_title = soup.select("h1.title .inlineFree")
if allow_quality:
allows = ['240', '480', '720', '1080']
if isinstance(allow_quality, str):
if allow_quality == 'all':
allow_quality = allows
elif isinstance(allow_quality, list):
add_default = True
for quality in allows:
if quality in allow_quality:
add_default = False
break
else:
allow_quality = ['720']
else:
allow_quality = ['720']
script = ""
for item in scripts:
script = item.text
break
exclude_chars = ['[', ']', '(', ')', "'", '"', "\\", "/", "|", "&", "*", "%", "$"]
for item in name_title:
# name = Translator(to_lang="Russian").translate(item.text.strip())
name = item.text
for char in exclude_chars:
name = name.replace(char, "")
name = name.strip()
break
arr = script.split("\n")
for line in arr:
var_media = re.match(r'.*var flashvars.* =', line)
if var_media:
var_media = var_media.group(0).replace("var ", '').replace("=", "").replace("{", "").strip()
break
script = "function get_elem(){\n var playerObjList = {};\n"+script+"\n"+ "return("+var_media+"['mediaDefinitions']);\n}"
if script:
result = js2py.eval_js(script)
videos = result().to_list()
if not os.path.exists('./videos'):
os.mkdir("./videos")
if not os.path.exists("./logs_downloads"):
os.mkdir("./logs_downloads")
for video in videos:
if not 'get_media' in video['videoUrl'] and isinstance(video['quality'], str):
if video['quality'] not in allow_quality: continue
print("Downloading video with name '"+name+"' and quality '"+video['quality']+"'")
video['quality'] = video['quality'].strip()
path_video = './videos/'+video['quality'].strip()
if not os.path.exists(path_video):
os.mkdir(path_video)
path_video = path_video+"/"+name+".mp4"
if os.path.exists(path_video):
return False
else:
dowloaded = os.system('ffmpeg -i "'+video['videoUrl']+'" -c copy -bsf:a aac_adtstoasc "'+path_video+'" 2> ./logs_downloads/'+name.replace(" ", "").replace(".", "")+'_downloads.log')
print(dowloaded)
if dowloaded == 0:
return True
else:
return False
def get_arg(param, onlyvalue = False):
arguments = sys.argv
length = len(arguments)
result = False
get_param = False
for i in range(1, length):
if get_param:
if onlyvalue:
result = arguments[i].strip()
else:
result = {
'key': param,
'value': arguments[i].strip()
}
get_param = False
if arguments[i] == '--'+param:
get_param = True
if i == length-1:
if onlyvalue:
result = True
else:
result = {
'key': param,
'value': True
}
return result
if __name__ == '__main__':
help_string = """
--limit - use if to wont limit count videos (example: download_pornhub --limit 10)
--start-page - use if to want to start from current page (example: download_pornhub --start-page 5)
--quality - use if to want choose quality of video (example: download_pornhub --quality 'all/240, 720/240')
"""
# Prepare args
limit = int(get_arg('limit', True))
start_page = int(get_arg('start-page', True))
quality = get_arg('quality', True)
help_v = get_arg('help', True)
if help_v:
print(help_string)
else:
if not limit and not start_page and not quality:
print("If you want to close, click Ctrl-c")
print("Print main.py --help for help")
if quality:
if ',' in quality:
quality_arr = quality.split(",")
quality = []
for qual in quality_arr:
quality.append(qual.strip())
elif not quality == 'all':
quality = [quality]
if start_page:
page = start_page-1
else:
page = 0
# Start main
x = 0
break_while = False
while True:
page = page+1
if break_while:
break
urls_video = get_video_urls(page)
length = len(urls_video)
if urls_video:
for i in range(0, length):
x = x+1
url = urls_video[i]
video = get_video(url, quality)
if not video:
print("Downloaded!")
x = x-1
if limit:
if limit == x:
break_while = True
break
else:
print("Page '"+page+"' not found")
break_while = True
break
backports.zoneinfo==0.2.1
beautifulsoup4==4.10.0
bs4==0.0.1
certifi==2021.10.8
charset-normalizer==2.0.12
click==8.0.4
filelock==3.6.0
idna==3.3
iso8601==1.0.2
Js2Py==0.71
libretranslatepy==2.1.1
lxml==4.8.0
pycryptodome==3.14.1
pyee==9.0.4
pyjsparser==2.7.1
pytz-deprecation-shim==0.1.0.post0
requests==2.27.1
requests-file==1.5.1
six==1.16.0
soupsieve==2.3.1
tldextract==3.2.0
translate==3.6.1
typing-extensions==4.1.1
tzdata==2021.5
tzlocal==4.1
urllib3==1.26.8
user-agent==0.1.10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment