Skip to content

Instantly share code, notes, and snippets.

@hkva
Last active May 21, 2022 03:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hkva/d9313a119b1c858ab93765d932bddf3f to your computer and use it in GitHub Desktop.
Save hkva/d9313a119b1c858ab93765d932bddf3f to your computer and use it in GitHub Desktop.
Python script to rip music from YouTube. Includes MP3 metadata, normalizes loudness, and embeds album art
#!/usr/bin/env python3
import json
import math
import os
import sys
import PIL.Image as img
import pyloudnorm as pl
import soundfile as sf
import yt_dlp as yt
# What we want to do:
# 1. Accept a video or playlist URL DONE
# 2. Extract audio DONE
# 3. Normalize audio DONE
# 4. Embed mp3 metadata DONE
# 5. Embed album art DONE
# 5. Save as <ARTIST> - <TITLE>.mp3 DONE
# Audio normalizer
class NormalizerPP(yt.postprocessor.PostProcessor):
def __init__(self, loudness):
yt.postprocessor.PostProcessor.__init__(self)
self._loudness = loudness
def run(self, info):
fp = os.path.join(info['__finaldir'], info['filepath'])
data, rate = sf.read(fp)
meter = pl.Meter(rate)
loudness = meter.integrated_loudness(data)
self.to_screen(
f'Normalizing loudness from {loudness} to {self._loudness}')
corrected = pl.normalize.loudness(data, loudness, self._loudness)
sf.write(fp, corrected, rate)
return [], info
# Create square album art
class AlbumArtPP(yt.postprocessor.PostProcessor):
def run(self, info):
# Find thumbnail path
thumb_name = next(
(t['filepath'] for t in info['thumbnails'] if 'filepath' in t),
None)
if thumb_name == None:
self.to_screen('No thumbnail could be found')
return [], info
# Open thumbnail
fp = os.path.join(info['__finaldir'], thumb_name)
im = img.open(fp)
# Center crop
leftcrop = math.ceil((im.width - im.height) / 2)
im = im.crop((leftcrop, 0, im.width - leftcrop, im.height))
self.to_screen(f'Resized to {im.height}x{im.height}')
# Save
im.save(fp)
return [], info
class RenamePP(yt.postprocessor.PostProcessor):
def run(self, info):
newpath = info['filepath']
if 'track' in info and 'artist' in info:
track = info['track']
artist = info['artist']
ext = os.path.splitext(info['filepath'])[1]
newpath = f'{artist} - {track}{ext}'
else:
self.to_screen('No metadata')
return [], info
# Remove path delimiters from name
newpath = newpath.replace('/', '-')
newpath = newpath.replace('\\', '-')
# Remove windows-unfriendly characters from name
newpath = newpath.replace('?', '')
self.to_screen(f'Renaming to {newpath}')
if not os.path.exists(newpath):
os.rename(
os.path.join(info['__finaldir'], info['filepath']),
os.path.join(info['__finaldir'], newpath)
)
info['filepath'] = newpath
else:
self.to_screen('File already exists, skipping')
return [], info
if __name__ == '__main__':
if len(sys.argv) != 2:
print(f'Usage: {sys.argv[0]} <URL>')
exit(1)
ytdl_opts = {
'format': 'bestaudio',
'writethumbnail': 'True',
}
with yt.YoutubeDL(ytdl_opts) as ytdl:
ytdl.add_post_processor(yt.postprocessor.FFmpegExtractAudioPP(
preferredcodec='wav'
))
ytdl.add_post_processor(NormalizerPP(-15))
ytdl.add_post_processor(yt.postprocessor.FFmpegExtractAudioPP(
ytdl, 'mp3'
))
ytdl.add_post_processor(yt.postprocessor.FFmpegMetadataPP(
ytdl
))
ytdl.add_post_processor(AlbumArtPP())
ytdl.add_post_processor(yt.postprocessor.EmbedThumbnailPP(
ytdl
))
ytdl.add_post_processor(RenamePP())
ytdl.download([sys.argv[1]])
# info = ytdl.extract_info(sys.argv[1])
# print(json.dumps(ytdl.sanitize_info(info), indent=4, sort_keys=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment