TheWhatis/Pornhub downloader.py

## Pornhub downloader.py
# Import
import re
import os
import sys
import json
import js2py
import random
import requests
import tldextract

# From
from time import sleep
from translate import Translator
from bs4 import BeautifulSoup as bs
from user_agent import generate_navigator as get_uagent

headers = {
    "Host": "rt.pornhub.com",
    "User-Agent": get_uagent()['user_agent'], #"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://duckduckgo.com/",
    "DNT": "1",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "cross-site",
    "Sec-GPC": "1",
    "Cache-Control": "max-age=0",
    "TE": "trailers"
}

proxies = False

def get_domain(url):
    domain = url.split("?")[0]
    domain_path = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', domain)
    domain_path = domain_path[0]
    tld = tldextract.extract(domain_path)
    return tld

def get_video_urls(page = 1):
    url = "https://rt.pornhub.com"
    domain = "https://rt.pornhub.com"

    if page > 1:
        url = url+"/video?page="+str(page)

    response = requests.get(url)

    if response.status_code == 200:
        soup = bs(response.text, 'html.parser')#'html5lib')
        links = []

        a_html = soup.select(".sectionWrapper li .wrap .title a")
        for item in a_html:
            links.append(domain+item.get("href").strip())

        return links
    else:
        return False

def get_video(url, allow_quality = False):
    response = requests.get(url=url, headers=headers, proxies=proxies)

    soup = bs(response.text, 'html.parser')#'html5lib')
    scripts = soup.select("#player script")
    name_title = soup.select("h1.title .inlineFree")

    if allow_quality:
        allows = ['240', '480', '720', '1080']
        if isinstance(allow_quality, str):
            if allow_quality == 'all':
                allow_quality = allows
        elif isinstance(allow_quality, list):
            add_default = True
            for quality in allows:
                if quality in allow_quality:
                    add_default = False
                    break
        else:
            allow_quality = ['720']
    else:
        allow_quality = ['720']

    script = ""
    for item in scripts:
        script = item.text
        break

    exclude_chars = ['[', ']', '(', ')', "'", '"', "\\", "/", "|", "&", "*", "%", "$"]
    for item in name_title:
        # name = Translator(to_lang="Russian").translate(item.text.strip())
        name = item.text
        for char in exclude_chars:
            name = name.replace(char, "")

        name = name.strip()
        break

    arr = script.split("\n")
    for line in arr:
        var_media = re.match(r'.*var flashvars.* =', line)
        if var_media:
            var_media = var_media.group(0).replace("var ", '').replace("=", "").replace("{", "").strip()
            break

    script = "function get_elem(){\n var playerObjList = {};\n"+script+"\n"+ "return("+var_media+"['mediaDefinitions']);\n}"

    if script:
        result = js2py.eval_js(script)
        videos = result().to_list()

        if not os.path.exists('./videos'):
            os.mkdir("./videos")

        if not os.path.exists("./logs_downloads"):
            os.mkdir("./logs_downloads")

        for video in videos:
            if not 'get_media' in video['videoUrl'] and isinstance(video['quality'], str):
                if video['quality'] not in allow_quality: continue

                print("Downloading video with name '"+name+"' and quality '"+video['quality']+"'")
                video['quality'] = video['quality'].strip()

                path_video = './videos/'+video['quality'].strip()
                if not os.path.exists(path_video):
                    os.mkdir(path_video)

                path_video = path_video+"/"+name+".mp4"
                if os.path.exists(path_video):
                    return False
                else:
                    dowloaded = os.system('ffmpeg -i "'+video['videoUrl']+'" -c copy -bsf:a aac_adtstoasc "'+path_video+'" 2> ./logs_downloads/'+name.replace(" ", "").replace(".", "")+'_downloads.log')
                    print(dowloaded)
                    if dowloaded == 0:
                        return True
                    else:
                        return False

def get_arg(param, onlyvalue = False):
    arguments = sys.argv

    length = len(arguments)
    result = False

    get_param = False
    for i in range(1, length):
        if get_param:
            if onlyvalue:
                result = arguments[i].strip()
            else:
                result = {
                    'key': param,
                    'value': arguments[i].strip()
                }


        get_param = False
        if arguments[i] == '--'+param:
            get_param = True
            if i == length-1:
                if onlyvalue:
                    result = True
                else:
                    result = {
                        'key': param,
                        'value': True
                    }

    return result

if __name__ == '__main__':
    help_string = """
    --limit - use if to wont limit count videos (example: download_pornhub --limit 10)
    --start-page - use if to want to start from current page (example: download_pornhub --start-page 5)
    --quality - use if to want choose quality of video (example: download_pornhub --quality 'all/240, 720/240')
"""

    # Prepare args
    limit = int(get_arg('limit', True))
    start_page = int(get_arg('start-page', True))
    quality = get_arg('quality', True)
    help_v = get_arg('help', True)

    if help_v:
        print(help_string)
    else:
        if not limit and not start_page and not quality:
            print("If you want to close, click Ctrl-c")
            print("Print main.py --help for help")

        if quality:
            if ',' in quality:
                quality_arr = quality.split(",")
                quality = []
                for qual in quality_arr:
                    quality.append(qual.strip())
            elif not quality == 'all':
                quality = [quality]

        if start_page:
            page = start_page-1
        else:
            page = 0

        # Start main
        x = 0
        break_while = False

        while True:
            page = page+1

            if break_while:
                break

            urls_video = get_video_urls(page)
            length = len(urls_video)

            if urls_video:
                for i in range(0, length):
                    x = x+1
                    url = urls_video[i]

                    video = get_video(url, quality)

                    if not video:
                        print("Downloaded!")
                        x = x-1

                    if limit:
                        if limit == x:
                            break_while = True
                            break
            else:
                print("Page '"+page+"' not found")
                break_while = True
                break

## requirements.txt
backports.zoneinfo==0.2.1
beautifulsoup4==4.10.0
bs4==0.0.1
certifi==2021.10.8
charset-normalizer==2.0.12
click==8.0.4
filelock==3.6.0
idna==3.3
iso8601==1.0.2
Js2Py==0.71
libretranslatepy==2.1.1
lxml==4.8.0
pycryptodome==3.14.1
pyee==9.0.4
pyjsparser==2.7.1
pytz-deprecation-shim==0.1.0.post0
requests==2.27.1
requests-file==1.5.1
six==1.16.0
soupsieve==2.3.1
tldextract==3.2.0
translate==3.6.1
typing-extensions==4.1.1
tzdata==2021.5
tzlocal==4.1
urllib3==1.26.8
user-agent==0.1.10
	# Import
	import re
	import os
	import sys
	import json
	import js2py
	import random
	import requests
	import tldextract

	# From
	from time import sleep
	from translate import Translator
	from bs4 import BeautifulSoup as bs
	from user_agent import generate_navigator as get_uagent

	headers = {
	"Host": "rt.pornhub.com",
	"User-Agent": get_uagent()['user_agent'], #"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,/;q=0.8",
	"Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
	"Accept-Encoding": "gzip, deflate, br",
	"Referer": "https://duckduckgo.com/",
	"DNT": "1",
	"Connection": "keep-alive",
	"Upgrade-Insecure-Requests": "1",
	"Sec-Fetch-Dest": "document",
	"Sec-Fetch-Mode": "navigate",
	"Sec-Fetch-Site": "cross-site",
	"Sec-GPC": "1",
	"Cache-Control": "max-age=0",
	"TE": "trailers"
	}

	proxies = False

	def get_domain(url):
	domain = url.split("?")[0]
	domain_path = re.findall('http[s]?://(?:[a-zA-Z]\|[0-9]\|[$-_@.&+]\|[!*\(\),]\|(?:%[0-9a-fA-F][0-9a-fA-F]))+', domain)
	domain_path = domain_path[0]
	tld = tldextract.extract(domain_path)
	return tld

	def get_video_urls(page = 1):
	url = "https://rt.pornhub.com"
	domain = "https://rt.pornhub.com"

	if page > 1:
	url = url+"/video?page="+str(page)

	response = requests.get(url)

	if response.status_code == 200:
	soup = bs(response.text, 'html.parser')#'html5lib')
	links = []

	a_html = soup.select(".sectionWrapper li .wrap .title a")
	for item in a_html:
	links.append(domain+item.get("href").strip())

	return links
	else:
	return False

	def get_video(url, allow_quality = False):
	response = requests.get(url=url, headers=headers, proxies=proxies)

	soup = bs(response.text, 'html.parser')#'html5lib')
	scripts = soup.select("#player script")
	name_title = soup.select("h1.title .inlineFree")

	if allow_quality:
	allows = ['240', '480', '720', '1080']
	if isinstance(allow_quality, str):
	if allow_quality == 'all':
	allow_quality = allows
	elif isinstance(allow_quality, list):
	add_default = True
	for quality in allows:
	if quality in allow_quality:
	add_default = False
	break
	else:
	allow_quality = ['720']
	else:
	allow_quality = ['720']

	script = ""
	for item in scripts:
	script = item.text
	break

	exclude_chars = ['[', ']', '(', ')', "'", '"', "\\", "/", "\|", "&", "*", "%", "$"]
	for item in name_title:
	# name = Translator(to_lang="Russian").translate(item.text.strip())
	name = item.text
	for char in exclude_chars:
	name = name.replace(char, "")

	name = name.strip()
	break

	arr = script.split("\n")
	for line in arr:
	var_media = re.match(r'.var flashvars. =', line)
	if var_media:
	var_media = var_media.group(0).replace("var ", '').replace("=", "").replace("{", "").strip()
	break

	script = "function get_elem(){\n var playerObjList = {};\n"+script+"\n"+ "return("+var_media+"['mediaDefinitions']);\n}"

	if script:
	result = js2py.eval_js(script)
	videos = result().to_list()

	if not os.path.exists('./videos'):
	os.mkdir("./videos")

	if not os.path.exists("./logs_downloads"):
	os.mkdir("./logs_downloads")

	for video in videos:
	if not 'get_media' in video['videoUrl'] and isinstance(video['quality'], str):
	if video['quality'] not in allow_quality: continue

	print("Downloading video with name '"+name+"' and quality '"+video['quality']+"'")
	video['quality'] = video['quality'].strip()

	path_video = './videos/'+video['quality'].strip()
	if not os.path.exists(path_video):
	os.mkdir(path_video)

	path_video = path_video+"/"+name+".mp4"
	if os.path.exists(path_video):
	return False
	else:
	dowloaded = os.system('ffmpeg -i "'+video['videoUrl']+'" -c copy -bsf:a aac_adtstoasc "'+path_video+'" 2> ./logs_downloads/'+name.replace(" ", "").replace(".", "")+'_downloads.log')
	print(dowloaded)
	if dowloaded == 0:
	return True
	else:
	return False

	def get_arg(param, onlyvalue = False):
	arguments = sys.argv

	length = len(arguments)
	result = False

	get_param = False
	for i in range(1, length):
	if get_param:
	if onlyvalue:
	result = arguments[i].strip()
	else:
	result = {
	'key': param,
	'value': arguments[i].strip()
	}


	get_param = False
	if arguments[i] == '--'+param:
	get_param = True
	if i == length-1:
	if onlyvalue:
	result = True
	else:
	result = {
	'key': param,
	'value': True
	}

	return result

	if __name__ == '__main__':
	help_string = """
	--limit - use if to wont limit count videos (example: download_pornhub --limit 10)
	--start-page - use if to want to start from current page (example: download_pornhub --start-page 5)
	--quality - use if to want choose quality of video (example: download_pornhub --quality 'all/240, 720/240')
	"""

	# Prepare args
	limit = int(get_arg('limit', True))
	start_page = int(get_arg('start-page', True))
	quality = get_arg('quality', True)
	help_v = get_arg('help', True)

	if help_v:
	print(help_string)
	else:
	if not limit and not start_page and not quality:
	print("If you want to close, click Ctrl-c")
	print("Print main.py --help for help")

	if quality:
	if ',' in quality:
	quality_arr = quality.split(",")
	quality = []
	for qual in quality_arr:
	quality.append(qual.strip())
	elif not quality == 'all':
	quality = [quality]

	if start_page:
	page = start_page-1
	else:
	page = 0

	# Start main
	x = 0
	break_while = False

	while True:
	page = page+1

	if break_while:
	break

	urls_video = get_video_urls(page)
	length = len(urls_video)

	if urls_video:
	for i in range(0, length):
	x = x+1
	url = urls_video[i]

	video = get_video(url, quality)

	if not video:
	print("Downloaded!")
	x = x-1

	if limit:
	if limit == x:
	break_while = True
	break
	else:
	print("Page '"+page+"' not found")
	break_while = True
	break
	backports.zoneinfo==0.2.1
	beautifulsoup4==4.10.0
	bs4==0.0.1
	certifi==2021.10.8
	charset-normalizer==2.0.12
	click==8.0.4
	filelock==3.6.0
	idna==3.3
	iso8601==1.0.2
	Js2Py==0.71
	libretranslatepy==2.1.1
	lxml==4.8.0
	pycryptodome==3.14.1
	pyee==9.0.4
	pyjsparser==2.7.1
	pytz-deprecation-shim==0.1.0.post0
	requests==2.27.1
	requests-file==1.5.1
	six==1.16.0
	soupsieve==2.3.1
	tldextract==3.2.0
	translate==3.6.1
	typing-extensions==4.1.1
	tzdata==2021.5
	tzlocal==4.1
	urllib3==1.26.8
	user-agent==0.1.10