Skip to content

Instantly share code, notes, and snippets.

@crosstyan
Created June 21, 2020 06:26
Show Gist options
  • Save crosstyan/a2a0163b33c539c98497629d5fb1d059 to your computer and use it in GitHub Desktop.
Save crosstyan/a2a0163b33c539c98497629d5fb1d059 to your computer and use it in GitHub Desktop.
use "http://v.ranks.xin/" api to download videos
from typing import List
import requests
#import json
import re
import os
import asyncio
from contextlib import closing
DOWNLOAD_FOLDER="download_videos"
URL_LIST_FILENAME="url_list.txt"
basedir = os.path.abspath(os.path.dirname(__file__))
def is_downloadable(url):
"""
Does the url contain a downloadable resource
"""
h = requests.head(url, allow_redirects=True)
header = h.headers
content_type = header.get('content-type')
if 'text' in content_type.lower():
return False
if 'html' in content_type.lower():
return False
return True
def download_file(url):
filename=url2filename(url)
# NOTE the stream=True parameter below
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(os.path.join(basedir,DOWNLOAD_FOLDER,filename), 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
f.write(chunk)
return filename
def open_url(filename):
with open(filename,'r+',encoding='UTF-8') as input_file:
url_list=input_file.readlines()
return url_list
def get_download_url_list(url_list:List[str])->List[str]:
download_url_list=[]
for url in url_list:
url=url.strip()
payload={'url':url}
api_request=requests.get('http://v.ranks.xin/video-parse.php',params=payload)
api_retrun=api_request.json()
if api_retrun["code"]==0:
download_url=api_retrun['data'][0]['url']
download_url_list.append(download_url)
return download_url_list
def url2filename(url:str)->str:
filename=re.search(r'(?<=\/)[^\/\?#]+(?=[^\/]*$)',download_url).group(0)
return filename
if __name__=="__main__":
url_list_path=os.path.join(basedir,URL_LIST_FILENAME)
url_list=open_url(url_list_path)
download_url_list=get_download_url_list(url_list)
for download_url in download_url_list:
if is_downloadable(download_url):
filename=url2filename(download_url)
#re.search() returns a Match Object, you have to use .group() fuc
download_file(download_url)
print(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment