Last active
March 27, 2021 22:05
-
-
Save u1735067/0de5f449c7fd418480d71282b6fb55cd to your computer and use it in GitHub Desktop.
United We Stream audio downloader, with corrected meta & cover (dirty script)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python3 | |
import sys, os, io, re, subprocess, threading, shutil, json | |
def youtubedl_fetch(url): | |
# Like check_output, but keep stderr | |
# https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L194 | |
cmdline = [ | |
'./youtube-dl-2020.06.16.1', | |
'--ffmpeg-location', 'ffmpeg-4.3-amd64-static/', | |
'--prefer-free-formats', | |
'--merge-output-format', 'mkv', | |
'-f', '(bestvideo[ext=webm]/bestvideo)+(bestaudio[ext=webm]/bestaudio)/best', | |
'--write-info-json', | |
'--write-thumbnail', | |
'--print-json', | |
'--verbose', | |
url | |
] | |
stdout_buffer = io.BytesIO() | |
print('> {}'.format(' '.join(cmdline))) | |
try: | |
process = subprocess.Popen( | |
cmdline, | |
#stdout=sys.stdout, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
cwd=os.getcwd(), | |
env=os.environ.copy(), | |
close_fds=True, | |
) | |
# https://stackoverflow.com/questions/42148113/alternative-to-subprocess-popen-communicate-for-streaming | |
stdout_thread = threading.Thread(target=shutil.copyfileobj, args=(process.stdout, stdout_buffer)) | |
stdout_thread.start() | |
for line in iter(process.stderr.readline, b''): | |
sys.stdout.write(line.decode('utf-8', errors='replace')) | |
sys.stdout.flush() | |
#process.communicate() # This wait for the process to terminate | |
retcode = process.wait() | |
stdout_thread.join() | |
except KeyboardInterrupt: | |
process.terminate() | |
raise | |
if retcode: # Problem | |
print('>> Error executing subprocess {}, rc={}'.format(cmdline, retcode)) | |
stdout = stdout_buffer.getvalue().decode('utf-8') | |
print(stdout) | |
try: | |
return json.loads(stdout.split('\n')[0]) | |
except Exception as e: | |
print('>> Error parsing JSON response: {}'.format(e)) | |
raise SystemExit(2) | |
def execute_command(cmd, args=[]): | |
# Like check_output, but keep stderr | |
# https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L194 | |
cmdline = [cmd] + args | |
print('> {}'.format(' '.join(cmdline))) | |
try: | |
process = subprocess.Popen( | |
cmdline, | |
#stdout=sys.stdout, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT, | |
cwd=os.getcwd(), | |
env=os.environ.copy(), | |
close_fds=True, | |
) | |
for line in iter(process.stdout.readline, b''): | |
sys.stdout.write(line.decode('utf-8', errors='replace')) | |
sys.stdout.flush() | |
#process.communicate() # This wait for the process to terminate | |
retcode = process.wait() | |
except KeyboardInterrupt: | |
process.terminate() | |
raise | |
if retcode: # Problem | |
print('>> Error executing subprocess {}, rc={}'.format(cmdline, retcode)) | |
collection_map = { | |
'096844': {'name': 'United We Stream', 'location': 'DE'}, | |
'097457': {'name': 'United We Stream', 'location': 'DE'}, | |
'098237': {'name': 'United We Stream', 'location': 'DE'}, | |
'096844': {'name': 'United We Stream', 'location': 'DE'}, # Guests starting at 50 | |
'096905': {'name': 'United We Stream Global', 'location': ''}, | |
'098344': {'name': 'United We Stream Festival', 'location': ''}, | |
'100605': {'name': 'United We Stream November Lockdown Edition', 'location': ''}, | |
'098001': {'name': 'United We Stream Paris', 'location': 'FR'}, | |
} | |
urls = [] | |
with open(sys.argv[1], 'r') as in_list: | |
for url in in_list: | |
url = url.strip() | |
if not url or url.startswith('#'): | |
continue | |
urls.append(url) | |
print('{} URLs to fetch'.format(len(urls))) | |
for i, url in enumerate(urls, start=1): | |
print('--- ({}/{}) {}'.format(i, len(urls), url)) | |
meta_json = youtubedl_fetch(url) | |
collection_id, position, _ = meta_json['display_id'].split('-') | |
collection_name = collection_map[collection_id]['name'] | |
collection_location = collection_map[collection_id]['location'] | |
full_title = re.sub(r'\s{2,}', ' ', meta_json['fulltitle']).strip() | |
track_id = None | |
# Try to extract from name first, else use position | |
track_from_name = re.search(r'#\s*(?P<track>[0-9]+)', full_title) | |
track_id = track_from_name.group('track') if track_from_name is not None else position | |
track_id = int(track_id) | |
session_name = re.sub(r'United We Stream.+?(:|-|à|en|@)\s+', '', full_title) | |
if collection_id == '096844' and track_id >= 50: | |
title = '{} - {}'.format(collection_name, session_name) | |
else: | |
title = '{} #{} - {}'.format(collection_name, track_id, session_name) | |
print('Input {}: {}'.format('display_id', meta_json['display_id'])) | |
print('Input {}: {}'.format('fulltitle', meta_json['fulltitle'])) | |
print('Output {}: {}'.format('collection_name', collection_name)) | |
print('Output {}: {}'.format('track_id', track_id)) | |
print('Output {}: {}'.format('session_name', session_name)) | |
print('Output {}: {}'.format('title', title)) | |
date = re.sub(r'(.{4})(.{2})(.{2})', r'\1-\2-\3', meta_json['upload_date']) | |
in_filename = meta_json['_filename'] | |
out_filename = '{} [{}].mka'.format(title, meta_json['display_id']) | |
ffmpeg_args = [ | |
'-loglevel', 'verbose', | |
'-i', in_filename, | |
'-c', 'copy', | |
'-movflags', 'use_metadata_tags', | |
'-map_metadata', '0', | |
'-map_metadata:s:v', '0:s:v', | |
'-map_metadata:s:a', '0:s:a', | |
'-metadata', 'title={}'.format(title), | |
'-metadata', 'track={}'.format(track_id), | |
'-metadata', 'COLLECTION={}'.format(collection_name), | |
'-metadata', 'DATE_RECORDED={}'.format(date), | |
] | |
if collection_location: | |
ffmpeg_args += ['-metadata', 'RECORDING_LOCATION={}'.format(collection_location)] | |
ffmpeg_args += [ | |
'-metadata', 'DISTRIBUTED_BY={}'.format('Arte Concert'), | |
'-metadata', 'CATALOG_NUMBER={}'.format(meta_json['display_id']), | |
'-metadata', 'SESSION={}'.format(session_name), | |
] | |
if meta_json.get('description', '') and meta_json.get('description', '').strip(): | |
ffmpeg_args += ['-metadata', 'DESCRIPTION={}'.format(meta_json.get('description', '').strip())] | |
ffmpeg_args += [ | |
'-attach', in_filename.replace('.mp4', '.jpg'), | |
'-metadata:s:t:0', 'filename=cover_land.jpg', | |
'-metadata:s:t:0', 'mimetype=image/jpeg', | |
'-metadata:s:t:0', 'title=Thumbnail', | |
'-attach', in_filename.replace('.mp4', '.info.json'), | |
'-metadata:s:t:1', 'filename=youtube-dl.info.json', | |
'-metadata:s:t:1', 'mimetype=application/json', | |
'-metadata:s:t:1', 'title="Youtube-dl info file"', | |
out_filename | |
] | |
execute_command( | |
'ffmpeg-4.3-amd64-static/ffmpeg', | |
ffmpeg_args | |
) | |
os.remove(in_filename) | |
os.remove(in_filename.replace('.mp4', '.jpg')) | |
os.remove(in_filename.replace('.mp4', '.info.json')) | |
''' | |
track_name = | |
./jq -r '.fulltitle | sub("\\s{2,}"; " ")' | |
./jq -r '.fulltitle' | |
./jq -r '.fulltitle | sub("United We Stream Festival\\s+((à|-|en|@)\\s+)?(?<session>.*)"; "\(.session)") | sub("\\s{2,}"; " ")') | |
session_name = | |
./jq -r '.fulltitle | sub(".*#\\s*[0-9]+\\s*(:|-)\\s*(?<session>.*)"; "\(.session)") | |
./jq -r '.fulltitle | sub("United We Stream\\s+((à|-|en)\\s+)?(?<session>.*)"; "\(.session)") | |
./jq -r '.fulltitle' | |
./jq -r '.fulltitle | sub("United We Stream\\s+((à|-)\\s+)?(?<session>.*)"; "\(.session)") | |
./jq -r '.fulltitle | sub("United We Stream Festival\\s+((à|-|en|@)\\s+)?(?<session>.*)"; "\(.session)") | |
./jq -r '.fulltitle | sub("United We Stream\\s+((à|-|en)\\s+)?(?<session>.*)"; "\(.session)")' | |
''' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment