Skip to content

Instantly share code, notes, and snippets.

@lvm
Last active August 19, 2020 22:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lvm/e8f89de2cb1b20f73e3ce37aea16d879 to your computer and use it in GitHub Desktop.
Save lvm/e8f89de2cb1b20f73e3ce37aea16d879 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import re
import eyed3
import shlex
import string
import argparse
import youtube_dl
import subprocess as sp
from pathlib import Path
from youtube_dl.utils import sanitize_filename
USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
BAND_ALBUM_RE = (
f"(?P<band>[{string.printable} ]+)( — | ▲ | - )(?P<song>[{string.printable}]+)"
)
def ffmpeg(args: list) -> None:
"Base ffmpeg call."
sp.call(shlex.split(f"ffmpeg -v quiet -y {args}"))
def sanitize_album(album: str) -> str:
clean_these = [
"full album", # from youtube probably"
"\d{4}", # year
"(\[|\().*(\]|\))",
"HD",
]
album = re.sub("|".join(clean_these), "", album, flags=re.IGNORECASE).strip()
return album
def guess_band_song(audio_title: str) -> dict:
"Guesses the band / album based on the (non-sliced) filename"
dct = {"band":"", "song":""}
result = re.search(BAND_ALBUM_RE, audio_title)
if result:
dct = result.groupdict()
dct["band"] = dct.get("band").strip()
dct["song"] = sanitize_album(dct.get("song"))
else:
dct["band"] = "Various artists"
dct["song"] = audio_title
return dct
def save_id3(title:str, audio_file:str) -> None:
gbs = guess_band_song(title)
audiofile = eyed3.load(audio_file)
audiofile.tag.artist = gbs.get("band")
audiofile.tag.album = gbs.get("album")
audiofile.tag.album_artist = gbs.get("band")
audiofile.tag.title = gbs.get("song")
audiofile.tag.save()
def save_mp3(video_url):
"Get video info, save it as mp3 and save id3 tag"
assert video_url, "Missing video, can't continue"
title = ""
YTDL_OPTS = {
"quiet": True,
# "restrictfilenames": True,
"writethumbnail": False,
"ignoreerrors": True,
"geo_bypass": True,
"format": "mp4",
"outtmpl": f"%(title)s.%(ext)s",
"format": "bestaudio/best",
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
]
}
with youtube_dl.YoutubeDL(YTDL_OPTS) as ydl:
youtube_dl.utils.std_headers["User-Agent"] = USER_AGENT
ytdl_info = ydl.extract_info(video_url, download=False)
ydl.download([video_url])
title = ytdl_info.get("title")
audio_file = sanitize_filename(title, False, False)
audio_file = f"{audio_file}.mp3"
save_id3(title, audio_file)
return title, audio_file
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("video", type=str, default="", help="Video URL")
args = parser.parse_args()
if args.video:
video = (
args.video
if args.video.startswith("https://")
else f"https://www.youtube.com/watch?v={args.video}"
)
title, output = save_mp3(video)
print (f"Title: {title}\nFile: {output}")
else:
parser.print_help()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment