Created
June 21, 2020 06:26
-
-
Save crosstyan/a2a0163b33c539c98497629d5fb1d059 to your computer and use it in GitHub Desktop.
use "http://v.ranks.xin/" api to download videos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
import requests | |
#import json | |
import re | |
import os | |
import asyncio | |
from contextlib import closing | |
DOWNLOAD_FOLDER="download_videos" | |
URL_LIST_FILENAME="url_list.txt" | |
basedir = os.path.abspath(os.path.dirname(__file__)) | |
def is_downloadable(url): | |
""" | |
Does the url contain a downloadable resource | |
""" | |
h = requests.head(url, allow_redirects=True) | |
header = h.headers | |
content_type = header.get('content-type') | |
if 'text' in content_type.lower(): | |
return False | |
if 'html' in content_type.lower(): | |
return False | |
return True | |
def download_file(url): | |
filename=url2filename(url) | |
# NOTE the stream=True parameter below | |
with requests.get(url, stream=True) as r: | |
r.raise_for_status() | |
with open(os.path.join(basedir,DOWNLOAD_FOLDER,filename), 'wb') as f: | |
for chunk in r.iter_content(chunk_size=8192): | |
# If you have chunk encoded response uncomment if | |
# and set chunk_size parameter to None. | |
#if chunk: | |
f.write(chunk) | |
return filename | |
def open_url(filename): | |
with open(filename,'r+',encoding='UTF-8') as input_file: | |
url_list=input_file.readlines() | |
return url_list | |
def get_download_url_list(url_list:List[str])->List[str]: | |
download_url_list=[] | |
for url in url_list: | |
url=url.strip() | |
payload={'url':url} | |
api_request=requests.get('http://v.ranks.xin/video-parse.php',params=payload) | |
api_retrun=api_request.json() | |
if api_retrun["code"]==0: | |
download_url=api_retrun['data'][0]['url'] | |
download_url_list.append(download_url) | |
return download_url_list | |
def url2filename(url:str)->str: | |
filename=re.search(r'(?<=\/)[^\/\?#]+(?=[^\/]*$)',download_url).group(0) | |
return filename | |
if __name__=="__main__": | |
url_list_path=os.path.join(basedir,URL_LIST_FILENAME) | |
url_list=open_url(url_list_path) | |
download_url_list=get_download_url_list(url_list) | |
for download_url in download_url_list: | |
if is_downloadable(download_url): | |
filename=url2filename(download_url) | |
#re.search() returns a Match Object, you have to use .group() fuc | |
download_file(download_url) | |
print(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment