crosstyan/download.py

## download.py
from typing import List
import requests
#import json
import re
import os
import asyncio
from contextlib import closing

DOWNLOAD_FOLDER="download_videos"
URL_LIST_FILENAME="url_list.txt"
basedir = os.path.abspath(os.path.dirname(__file__))

def is_downloadable(url):
    """
    Does the url contain a downloadable resource
    """
    h = requests.head(url, allow_redirects=True)
    header = h.headers
    content_type = header.get('content-type')
    if 'text' in content_type.lower():
        return False
    if 'html' in content_type.lower():
        return False
    return True

def download_file(url):
    filename=url2filename(url)
    # NOTE the stream=True parameter below
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(os.path.join(basedir,DOWNLOAD_FOLDER,filename), 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                #if chunk:
                f.write(chunk)
    return filename
def open_url(filename):
    with open(filename,'r+',encoding='UTF-8') as input_file:
        url_list=input_file.readlines()
        return url_list

def get_download_url_list(url_list:List[str])->List[str]:
    download_url_list=[]
    for url in url_list:
        url=url.strip()
        payload={'url':url}
        api_request=requests.get('http://v.ranks.xin/video-parse.php',params=payload)
        api_retrun=api_request.json()
        if api_retrun["code"]==0:
            download_url=api_retrun['data'][0]['url']
            download_url_list.append(download_url)
    return download_url_list

def url2filename(url:str)->str:
    filename=re.search(r'(?<=\/)[^\/\?#]+(?=[^\/]*$)',download_url).group(0)
    return filename

if __name__=="__main__":
    url_list_path=os.path.join(basedir,URL_LIST_FILENAME)
    url_list=open_url(url_list_path)
    download_url_list=get_download_url_list(url_list)
    for download_url in download_url_list:
        if is_downloadable(download_url):
            filename=url2filename(download_url)
            #re.search() returns a Match Object, you have to use .group() fuc
            download_file(download_url)
            print(filename)
	from typing import List
	import requests
	#import json
	import re
	import os
	import asyncio
	from contextlib import closing

	DOWNLOAD_FOLDER="download_videos"
	URL_LIST_FILENAME="url_list.txt"
	basedir = os.path.abspath(os.path.dirname(__file__))

	def is_downloadable(url):
	"""
	Does the url contain a downloadable resource
	"""
	h = requests.head(url, allow_redirects=True)
	header = h.headers
	content_type = header.get('content-type')
	if 'text' in content_type.lower():
	return False
	if 'html' in content_type.lower():
	return False
	return True

	def download_file(url):
	filename=url2filename(url)
	# NOTE the stream=True parameter below
	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	with open(os.path.join(basedir,DOWNLOAD_FOLDER,filename), 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	# If you have chunk encoded response uncomment if
	# and set chunk_size parameter to None.
	#if chunk:
	f.write(chunk)
	return filename
	def open_url(filename):
	with open(filename,'r+',encoding='UTF-8') as input_file:
	url_list=input_file.readlines()
	return url_list

	def get_download_url_list(url_list:List[str])->List[str]:
	download_url_list=[]
	for url in url_list:
	url=url.strip()
	payload={'url':url}
	api_request=requests.get('http://v.ranks.xin/video-parse.php',params=payload)
	api_retrun=api_request.json()
	if api_retrun["code"]==0:
	download_url=api_retrun['data'][0]['url']
	download_url_list.append(download_url)
	return download_url_list

	def url2filename(url:str)->str:
	filename=re.search(r'(?<=\/)[^\/\?#]+(?=[^\/]*$)',download_url).group(0)
	return filename

	if __name__=="__main__":
	url_list_path=os.path.join(basedir,URL_LIST_FILENAME)
	url_list=open_url(url_list_path)
	download_url_list=get_download_url_list(url_list)
	for download_url in download_url_list:
	if is_downloadable(download_url):
	filename=url2filename(download_url)
	#re.search() returns a Match Object, you have to use .group() fuc
	download_file(download_url)
	print(filename)