Skip to content

Instantly share code, notes, and snippets.

@rainbowbreeze
Last active August 29, 2015 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rainbowbreeze/074cecfde5cd98a7c9eb to your computer and use it in GitHub Desktop.
Save rainbowbreeze/074cecfde5cd98a7c9eb to your computer and use it in GitHub Desktop.
from __future__ import unicode_literals
from pydub import AudioSegment
import youtube_dl
import logging
import os
import sys
def create_desc_file(url):
"""Creates a desc file directly from the YT file
url can be a string like 'https://www.youtube.com/watch?v=QcfIroNI3NU'
"""
class MyLogger(object):
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
print(msg)
def my_hook(d):
if d['status'] == 'finished':
print('Done downloading, now converting ...')
ydl_opts = {
'writedescription': True,
'skip_download': True,
#'format': 'bestaudio/best',
#'postprocessors': [{
# 'key': 'FFmpegExtractAudio',
# 'preferredcodec': 'mp3',
# 'preferredquality': '192',
#}],
#'logger': MyLogger(),
#'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
def save_segment(audiosegment, output_dir, start_sec, end_sec, bitrate, artist, title, source_url):
"""Saves a segment of an audio file to a mp3 file.
"""
# Example on how to use the library
#song = AudioSegment.from_file("07 Black Blade.mp3")
#first_10_seconds = song[:10000]
#first_10_seconds.export(
# "mashup.mp3",
# format="mp3",
# tags={'artist': 'Various artists',
# 'album': 'Best of 2011',
# 'comments': 'This album is awesome!', 'Title': 'Track title'
# }
#)
file_name = "%s - %s.mp3" % (artist, title)
print(" Cutting from %d to %d secs and saving to file %s" % (start_sec, end_sec, file_name))
final_file_name = os.path.join(output_dir, file_name)
finalsound = None
if end_sec == -1:
# Final time is not provided, split from the start to the end of the audio file
finalsound = audiosegment[start_sec*1000:]
else:
finalsound = audiosegment[start_sec*1000:end_sec*1000]
#TODO comments doesn't work :(
finalsound.export(final_file_name, format="mp3", bitrate=bitrate, tags={'artist': artist, 'title': title, 'comments': source_url})
def get_secs_from_string(s):
"""Transform hh:mm:ss or mm:ss strings to their seconds representation.
Returns the seconds that represent the given string or -1 is the string is null
"""
if s is None:
return -1
l = s.split(':')
secs = 0
#for i in xrange(len(l) - 1, -1, -1):
for i in reversed(xrange(len(l))): #more elegant, people say...
secs = secs + int(l[i]) * pow(60, len(l) - i - 1)
return secs
def read_tracks(file_name, parser_func):
"""Parses the description files with all the tracks time, title and author
Different format of the desc row is managed by a different parser_func
"""
# Parses track info and positions from the desc file
#f = open(file_name, 'r')
file_rows = []
print "Parsing tracks information from file " + file_name
with open(file_name, 'r') as f:
for line in f:
# Using write instead of print to skip the automatically added newline char
sys.stdout.write(" " + line)
# Calls the parser function
data = parser_func(line)
#print "#" + data['time'] + "#" + data['author'] + "#" + data['title'] + "#"
file_rows.append(data)
# Builds the final data for splitting tracks
tracks_info = []
print
print "Creating tracks list from parsed information"
for i in xrange(0, len(file_rows)):
split_data = {}
data = file_rows[i]
print " #%s# - #%s# - #%s#" % (data['time'], data['author'], data['title'])
if (i < len(file_rows) - 1):
split_data['end'] = file_rows[i+1]['time']
else:
split_data['end'] = None
split_data['start'] = data['time']
split_data['author'] = data['author']
split_data['title'] = data['title']
tracks_info.append(split_data)
return tracks_info
def parser_one(line):
"""Parses string like 14:52 Immediate Music - Falling Into Inertia
first space separates time from other info
first dash separates author from title
"""
data = {}
# Finds first space char
data['time'] = line[:line.find(' ')]
desc = line[line.find(' '):].strip()
data['author'] = desc[:desc.find('-')].strip()
data['title'] = desc[desc.find('-')+1:].strip()
return data
def parser_two(line):
"""Parses string like Immediate Music - Falling Into Inertia 14:52
last space separates time from other info
first dash separates author from title
"""
data = {}
# Finds last space char
time_pos = line.rfind(' ')
data['time'] = line[time_pos:].strip()
desc = line[:time_pos].strip()
data['author'] = desc[:desc.find('-')].strip()
data['title'] = desc[desc.find('-')+1:].strip()
return data
def enable_pydub_logging():
"""Enable Pydub logging
"""
l = logging.getLogger("pydub.converter")
l.setLevel(logging.DEBUG)
l.addHandler(logging.StreamHandler())
def main():
# More logging, in case of errors
#enable_pydub_logging()
# How to use this script
#
# Prerequisites
# $ pip install youtube-dl
# $ pip install pydub
#
# 1) Insert here the YT video URL
YOUTUBE_URL = "https://www.youtube.com/watch?v=QcfIroNI3NU"
# 2) Download the file you wanna split using
# $ youtube-dl -x %youtube_url%
# 3) Check the maximum available bitrate of m4a_dash formats container using
# $ youtube-dl -F %youtube_url%
BITRATE = "192k"
# 4) Insert here the file name saved by youtube-dl, without extension
FILENAME = "Epic Hits _ The Best of Epic Music 2014 - 1-Hour Full Cinematic - Epic Music Vn-QcfIroNI3NU"
# 5) Comment the following two lines
print "It seems you haven't read the instruction to run this script. Please do, they are in the source"
return
# 6) Now decomment the following three lines and run this script once
## Creates the .description file, it need to be edited!
# create_desc_file(YOUTUBE_URL)
# return
# 7) Comment again the previous three lines. Then edit the description file
# created by the app, putting duration, author and track name in a format
# that the parser can understand
# 8) Run again this script, the following part will be executed
# Change the parser passed to read_tracks method if the description
# file has a different format
description_file = FILENAME + ".description"
if not os.path.isfile(description_file):
print "Description file %s doesn't exists" % (description_file)
return
# Please check the file extension
downloaded_file = FILENAME + ".m4a"
if not os.path.isfile(downloaded_file):
print "Audio file %s doesn't exists" % (downloaded_file)
return
if not os.path.exists(FILENAME):
os.makedirs(FILENAME)
tracks_info = read_tracks(description_file, parser_one)
print
print "Now splitting the audio file in directory " + FILENAME
full_song = AudioSegment.from_file(downloaded_file)
for track in tracks_info:
save_segment(
full_song,
FILENAME,
get_secs_from_string(track['start']),
get_secs_from_string(track['end']),
BITRATE,
track['author'],
track['title'],
YOUTUBE_URL
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment