Skip to content

Instantly share code, notes, and snippets.

@VieVie31
Created July 7, 2019 20:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VieVie31/22be1ba7629c5d9f5d5e2f0b7d87188f to your computer and use it in GitHub Desktop.
Save VieVie31/22be1ba7629c5d9f5d5e2f0b7d87188f to your computer and use it in GitHub Desktop.
"""
Script to dowload video from the ARTE recording of the Paris's Philarmonie.
Example:
python3 download.py "https://www.arte.tv/fr/videos/087078-007-A/kazuki-yamada-dirige-berlioz-et-jarrell/"
python3 download.py "https://www.arte.tv/fr/videos/087078-007-A/kazuki-yamada-dirige-berlioz-et-jarrell/" my_output_name.mp4
"""
import os
import sys
import json
import argparse
import requests
from time import time
HEADERS = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
def download(url, filename):
#source : https://sumit-ghosh.com/articles/python-download-progress-bar/
with open(filename, 'wb') as f:
response = requests.get(url, stream=True)
total = response.headers.get('content-length')
if total is None:
f.write(response.content)
else:
downloaded = 0
total = int(total)
for data in response.iter_content(chunk_size=max(int(total / 1000), 1024 * 1024)):
downloaded += len(data)
f.write(data)
done = int(50 * downloaded/total)
sys.stdout.write('\r[{}{}]'.format('█' * done, '.' * (50-done)))
sys.stdout.flush()
sys.stdout.write('\n')
if __name__ == "__main__":
if sys.version_info.major < 3 and sys.version_info.minor < 7:
print("This script requires python 3.7 at least !")
if not len(sys.argv[1:]):
print("Usage : python3 video_page_url [out_path].")
url = sys.argv[1]
out_name = "" if not len(sys.argv) > 2 else sys.argv[2]
# Get the API info url code from the video page url
video_id = url.split("/videos/")[1].split('/')[0]
video_infos_url = f"https://api.arte.tv/api/player/v1/config/fr/{video_id}?autostart=1&lifeCycle=1&lang=fr_FR&autostart=1&mute=0"
# Start a session
session = requests.Session()
# Download videos info via the API
r = session.get(video_infos_url, headers=HEADERS)
if not r.ok:
raise Exception(f"Failed to find infos... :'( error code : {r.status_code}")
# Parse API result
api_infos = json.loads(r.content.decode())
# Find the highest bitrate mp4 video url
video_candidates = api_infos['videoJsonPlayer']['VSR']
video_candidates_keys = sorted(
filter(
lambda k: video_candidates[k]['mediaType'] == 'mp4',
video_candidates
),
key=lambda k: -video_candidates[k]['bitrate']
)
if not len(video_candidates_keys):
raise Exception("No mp4 url found... :'(")
video_candidate_key = video_candidates_keys[0]
video_url = video_candidates[video_candidate_key]['url']
# Attribute an output file name
if not out_name:
valid_chars = '-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
if 'VTI' in api_infos['videoJsonPlayer']:
out_name = api_infos['videoJsonPlayer']['VTI']
out_name = ''.join(c for c in out_name if c in valid_chars)
elif 'VID' in api_infos['videoJsonPlayer']:
out_name = api_infos['videoJsonPlayer']['VID']
out_name = ''.join(c for c in out_name if c in valid_chars)
else:
out_name = f"{time()}"
out_name = out_name + '.mp4'
# Download and save content
download(video_url, out_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment