Last active
August 29, 2015 14:25
-
-
Save rainbowbreeze/074cecfde5cd98a7c9eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import unicode_literals | |
from pydub import AudioSegment | |
import youtube_dl | |
import logging | |
import os | |
import sys | |
def create_desc_file(url): | |
"""Creates a desc file directly from the YT file | |
url can be a string like 'https://www.youtube.com/watch?v=QcfIroNI3NU' | |
""" | |
class MyLogger(object): | |
def debug(self, msg): | |
pass | |
def warning(self, msg): | |
pass | |
def error(self, msg): | |
print(msg) | |
def my_hook(d): | |
if d['status'] == 'finished': | |
print('Done downloading, now converting ...') | |
ydl_opts = { | |
'writedescription': True, | |
'skip_download': True, | |
#'format': 'bestaudio/best', | |
#'postprocessors': [{ | |
# 'key': 'FFmpegExtractAudio', | |
# 'preferredcodec': 'mp3', | |
# 'preferredquality': '192', | |
#}], | |
#'logger': MyLogger(), | |
#'progress_hooks': [my_hook], | |
} | |
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
def save_segment(audiosegment, output_dir, start_sec, end_sec, bitrate, artist, title, source_url): | |
"""Saves a segment of an audio file to a mp3 file. | |
""" | |
# Example on how to use the library | |
#song = AudioSegment.from_file("07 Black Blade.mp3") | |
#first_10_seconds = song[:10000] | |
#first_10_seconds.export( | |
# "mashup.mp3", | |
# format="mp3", | |
# tags={'artist': 'Various artists', | |
# 'album': 'Best of 2011', | |
# 'comments': 'This album is awesome!', 'Title': 'Track title' | |
# } | |
#) | |
file_name = "%s - %s.mp3" % (artist, title) | |
print(" Cutting from %d to %d secs and saving to file %s" % (start_sec, end_sec, file_name)) | |
final_file_name = os.path.join(output_dir, file_name) | |
finalsound = None | |
if end_sec == -1: | |
# Final time is not provided, split from the start to the end of the audio file | |
finalsound = audiosegment[start_sec*1000:] | |
else: | |
finalsound = audiosegment[start_sec*1000:end_sec*1000] | |
#TODO comments doesn't work :( | |
finalsound.export(final_file_name, format="mp3", bitrate=bitrate, tags={'artist': artist, 'title': title, 'comments': source_url}) | |
def get_secs_from_string(s): | |
"""Transform hh:mm:ss or mm:ss strings to their seconds representation. | |
Returns the seconds that represent the given string or -1 is the string is null | |
""" | |
if s is None: | |
return -1 | |
l = s.split(':') | |
secs = 0 | |
#for i in xrange(len(l) - 1, -1, -1): | |
for i in reversed(xrange(len(l))): #more elegant, people say... | |
secs = secs + int(l[i]) * pow(60, len(l) - i - 1) | |
return secs | |
def read_tracks(file_name, parser_func): | |
"""Parses the description files with all the tracks time, title and author | |
Different format of the desc row is managed by a different parser_func | |
""" | |
# Parses track info and positions from the desc file | |
#f = open(file_name, 'r') | |
file_rows = [] | |
print "Parsing tracks information from file " + file_name | |
with open(file_name, 'r') as f: | |
for line in f: | |
# Using write instead of print to skip the automatically added newline char | |
sys.stdout.write(" " + line) | |
# Calls the parser function | |
data = parser_func(line) | |
#print "#" + data['time'] + "#" + data['author'] + "#" + data['title'] + "#" | |
file_rows.append(data) | |
# Builds the final data for splitting tracks | |
tracks_info = [] | |
print "Creating tracks list from parsed information" | |
for i in xrange(0, len(file_rows)): | |
split_data = {} | |
data = file_rows[i] | |
print " #%s# - #%s# - #%s#" % (data['time'], data['author'], data['title']) | |
if (i < len(file_rows) - 1): | |
split_data['end'] = file_rows[i+1]['time'] | |
else: | |
split_data['end'] = None | |
split_data['start'] = data['time'] | |
split_data['author'] = data['author'] | |
split_data['title'] = data['title'] | |
tracks_info.append(split_data) | |
return tracks_info | |
def parser_one(line): | |
"""Parses string like 14:52 Immediate Music - Falling Into Inertia | |
first space separates time from other info | |
first dash separates author from title | |
""" | |
data = {} | |
# Finds first space char | |
data['time'] = line[:line.find(' ')] | |
desc = line[line.find(' '):].strip() | |
data['author'] = desc[:desc.find('-')].strip() | |
data['title'] = desc[desc.find('-')+1:].strip() | |
return data | |
def parser_two(line): | |
"""Parses string like Immediate Music - Falling Into Inertia 14:52 | |
last space separates time from other info | |
first dash separates author from title | |
""" | |
data = {} | |
# Finds last space char | |
time_pos = line.rfind(' ') | |
data['time'] = line[time_pos:].strip() | |
desc = line[:time_pos].strip() | |
data['author'] = desc[:desc.find('-')].strip() | |
data['title'] = desc[desc.find('-')+1:].strip() | |
return data | |
def enable_pydub_logging(): | |
"""Enable Pydub logging | |
""" | |
l = logging.getLogger("pydub.converter") | |
l.setLevel(logging.DEBUG) | |
l.addHandler(logging.StreamHandler()) | |
def main(): | |
# More logging, in case of errors | |
#enable_pydub_logging() | |
# How to use this script | |
# | |
# Prerequisites | |
# $ pip install youtube-dl | |
# $ pip install pydub | |
# | |
# 1) Insert here the YT video URL | |
YOUTUBE_URL = "https://www.youtube.com/watch?v=QcfIroNI3NU" | |
# 2) Download the file you wanna split using | |
# $ youtube-dl -x %youtube_url% | |
# 3) Check the maximum available bitrate of m4a_dash formats container using | |
# $ youtube-dl -F %youtube_url% | |
BITRATE = "192k" | |
# 4) Insert here the file name saved by youtube-dl, without extension | |
FILENAME = "Epic Hits _ The Best of Epic Music 2014 - 1-Hour Full Cinematic - Epic Music Vn-QcfIroNI3NU" | |
# 5) Comment the following two lines | |
print "It seems you haven't read the instruction to run this script. Please do, they are in the source" | |
return | |
# 6) Now decomment the following three lines and run this script once | |
## Creates the .description file, it need to be edited! | |
# create_desc_file(YOUTUBE_URL) | |
# return | |
# 7) Comment again the previous three lines. Then edit the description file | |
# created by the app, putting duration, author and track name in a format | |
# that the parser can understand | |
# 8) Run again this script, the following part will be executed | |
# Change the parser passed to read_tracks method if the description | |
# file has a different format | |
description_file = FILENAME + ".description" | |
if not os.path.isfile(description_file): | |
print "Description file %s doesn't exists" % (description_file) | |
return | |
# Please check the file extension | |
downloaded_file = FILENAME + ".m4a" | |
if not os.path.isfile(downloaded_file): | |
print "Audio file %s doesn't exists" % (downloaded_file) | |
return | |
if not os.path.exists(FILENAME): | |
os.makedirs(FILENAME) | |
tracks_info = read_tracks(description_file, parser_one) | |
print "Now splitting the audio file in directory " + FILENAME | |
full_song = AudioSegment.from_file(downloaded_file) | |
for track in tracks_info: | |
save_segment( | |
full_song, | |
FILENAME, | |
get_secs_from_string(track['start']), | |
get_secs_from_string(track['end']), | |
BITRATE, | |
track['author'], | |
track['title'], | |
YOUTUBE_URL | |
) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment