Skip to content

Instantly share code, notes, and snippets.

@u1735067
Last active March 27, 2021 22:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save u1735067/0de5f449c7fd418480d71282b6fb55cd to your computer and use it in GitHub Desktop.
Save u1735067/0de5f449c7fd418480d71282b6fb55cd to your computer and use it in GitHub Desktop.
United We Stream audio downloader, with corrected meta & cover (dirty script)
#!python3
import sys, os, io, re, subprocess, threading, shutil, json
def youtubedl_fetch(url):
# Like check_output, but keep stderr
# https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L194
cmdline = [
'./youtube-dl-2020.06.16.1',
'--ffmpeg-location', 'ffmpeg-4.3-amd64-static/',
'--prefer-free-formats',
'--merge-output-format', 'mkv',
'-f', '(bestvideo[ext=webm]/bestvideo)+(bestaudio[ext=webm]/bestaudio)/best',
'--write-info-json',
'--write-thumbnail',
'--print-json',
'--verbose',
url
]
stdout_buffer = io.BytesIO()
print('> {}'.format(' '.join(cmdline)))
try:
process = subprocess.Popen(
cmdline,
#stdout=sys.stdout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=os.getcwd(),
env=os.environ.copy(),
close_fds=True,
)
# https://stackoverflow.com/questions/42148113/alternative-to-subprocess-popen-communicate-for-streaming
stdout_thread = threading.Thread(target=shutil.copyfileobj, args=(process.stdout, stdout_buffer))
stdout_thread.start()
for line in iter(process.stderr.readline, b''):
sys.stdout.write(line.decode('utf-8', errors='replace'))
sys.stdout.flush()
#process.communicate() # This wait for the process to terminate
retcode = process.wait()
stdout_thread.join()
except KeyboardInterrupt:
process.terminate()
raise
if retcode: # Problem
print('>> Error executing subprocess {}, rc={}'.format(cmdline, retcode))
stdout = stdout_buffer.getvalue().decode('utf-8')
print(stdout)
try:
return json.loads(stdout.split('\n')[0])
except Exception as e:
print('>> Error parsing JSON response: {}'.format(e))
raise SystemExit(2)
def execute_command(cmd, args=[]):
# Like check_output, but keep stderr
# https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L194
cmdline = [cmd] + args
print('> {}'.format(' '.join(cmdline)))
try:
process = subprocess.Popen(
cmdline,
#stdout=sys.stdout,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=os.getcwd(),
env=os.environ.copy(),
close_fds=True,
)
for line in iter(process.stdout.readline, b''):
sys.stdout.write(line.decode('utf-8', errors='replace'))
sys.stdout.flush()
#process.communicate() # This wait for the process to terminate
retcode = process.wait()
except KeyboardInterrupt:
process.terminate()
raise
if retcode: # Problem
print('>> Error executing subprocess {}, rc={}'.format(cmdline, retcode))
collection_map = {
'096844': {'name': 'United We Stream', 'location': 'DE'},
'097457': {'name': 'United We Stream', 'location': 'DE'},
'098237': {'name': 'United We Stream', 'location': 'DE'},
'096844': {'name': 'United We Stream', 'location': 'DE'}, # Guests starting at 50
'096905': {'name': 'United We Stream Global', 'location': ''},
'098344': {'name': 'United We Stream Festival', 'location': ''},
'100605': {'name': 'United We Stream November Lockdown Edition', 'location': ''},
'098001': {'name': 'United We Stream Paris', 'location': 'FR'},
}
urls = []
with open(sys.argv[1], 'r') as in_list:
for url in in_list:
url = url.strip()
if not url or url.startswith('#'):
continue
urls.append(url)
print('{} URLs to fetch'.format(len(urls)))
for i, url in enumerate(urls, start=1):
print('--- ({}/{}) {}'.format(i, len(urls), url))
meta_json = youtubedl_fetch(url)
collection_id, position, _ = meta_json['display_id'].split('-')
collection_name = collection_map[collection_id]['name']
collection_location = collection_map[collection_id]['location']
full_title = re.sub(r'\s{2,}', ' ', meta_json['fulltitle']).strip()
track_id = None
# Try to extract from name first, else use position
track_from_name = re.search(r'#\s*(?P<track>[0-9]+)', full_title)
track_id = track_from_name.group('track') if track_from_name is not None else position
track_id = int(track_id)
session_name = re.sub(r'United We Stream.+?(:|-|à|en|@)\s+', '', full_title)
if collection_id == '096844' and track_id >= 50:
title = '{} - {}'.format(collection_name, session_name)
else:
title = '{} #{} - {}'.format(collection_name, track_id, session_name)
print('Input {}: {}'.format('display_id', meta_json['display_id']))
print('Input {}: {}'.format('fulltitle', meta_json['fulltitle']))
print('Output {}: {}'.format('collection_name', collection_name))
print('Output {}: {}'.format('track_id', track_id))
print('Output {}: {}'.format('session_name', session_name))
print('Output {}: {}'.format('title', title))
date = re.sub(r'(.{4})(.{2})(.{2})', r'\1-\2-\3', meta_json['upload_date'])
in_filename = meta_json['_filename']
out_filename = '{} [{}].mka'.format(title, meta_json['display_id'])
ffmpeg_args = [
'-loglevel', 'verbose',
'-i', in_filename,
'-c', 'copy',
'-movflags', 'use_metadata_tags',
'-map_metadata', '0',
'-map_metadata:s:v', '0:s:v',
'-map_metadata:s:a', '0:s:a',
'-metadata', 'title={}'.format(title),
'-metadata', 'track={}'.format(track_id),
'-metadata', 'COLLECTION={}'.format(collection_name),
'-metadata', 'DATE_RECORDED={}'.format(date),
]
if collection_location:
ffmpeg_args += ['-metadata', 'RECORDING_LOCATION={}'.format(collection_location)]
ffmpeg_args += [
'-metadata', 'DISTRIBUTED_BY={}'.format('Arte Concert'),
'-metadata', 'CATALOG_NUMBER={}'.format(meta_json['display_id']),
'-metadata', 'SESSION={}'.format(session_name),
]
if meta_json.get('description', '') and meta_json.get('description', '').strip():
ffmpeg_args += ['-metadata', 'DESCRIPTION={}'.format(meta_json.get('description', '').strip())]
ffmpeg_args += [
'-attach', in_filename.replace('.mp4', '.jpg'),
'-metadata:s:t:0', 'filename=cover_land.jpg',
'-metadata:s:t:0', 'mimetype=image/jpeg',
'-metadata:s:t:0', 'title=Thumbnail',
'-attach', in_filename.replace('.mp4', '.info.json'),
'-metadata:s:t:1', 'filename=youtube-dl.info.json',
'-metadata:s:t:1', 'mimetype=application/json',
'-metadata:s:t:1', 'title="Youtube-dl info file"',
out_filename
]
execute_command(
'ffmpeg-4.3-amd64-static/ffmpeg',
ffmpeg_args
)
os.remove(in_filename)
os.remove(in_filename.replace('.mp4', '.jpg'))
os.remove(in_filename.replace('.mp4', '.info.json'))
'''
track_name =
./jq -r '.fulltitle | sub("\\s{2,}"; " ")'
./jq -r '.fulltitle'
./jq -r '.fulltitle | sub("United We Stream Festival\\s+((à|-|en|@)\\s+)?(?<session>.*)"; "\(.session)") | sub("\\s{2,}"; " ")')
session_name =
./jq -r '.fulltitle | sub(".*#\\s*[0-9]+\\s*(:|-)\\s*(?<session>.*)"; "\(.session)")
./jq -r '.fulltitle | sub("United We Stream\\s+((à|-|en)\\s+)?(?<session>.*)"; "\(.session)")
./jq -r '.fulltitle'
./jq -r '.fulltitle | sub("United We Stream\\s+((à|-)\\s+)?(?<session>.*)"; "\(.session)")
./jq -r '.fulltitle | sub("United We Stream Festival\\s+((à|-|en|@)\\s+)?(?<session>.*)"; "\(.session)")
./jq -r '.fulltitle | sub("United We Stream\\s+((à|-|en)\\s+)?(?<session>.*)"; "\(.session)")'
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment