rainbowbreeze/split_youtube_mix.py

## split_youtube_mix.py
from __future__ import unicode_literals
from pydub import AudioSegment
import youtube_dl
import logging
import os
import sys

def create_desc_file(url):
    """Creates a desc file directly from the YT file

    url can be a string like 'https://www.youtube.com/watch?v=QcfIroNI3NU'
    """
    class MyLogger(object):
        def debug(self, msg):
            pass
        def warning(self, msg):
            pass
        def error(self, msg):
            print(msg)

    def my_hook(d):
        if d['status'] == 'finished':
            print('Done downloading, now converting ...')

    ydl_opts = {
        'writedescription': True,
        'skip_download': True,
        #'format': 'bestaudio/best',
        #'postprocessors': [{
        #    'key': 'FFmpegExtractAudio',
        #    'preferredcodec': 'mp3',
        #    'preferredquality': '192',
        #}],
        #'logger': MyLogger(),
        #'progress_hooks': [my_hook],
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])


def save_segment(audiosegment, output_dir, start_sec, end_sec, bitrate, artist, title, source_url):
    """Saves a segment of an audio file to a mp3 file.
    """

    # Example on how to use the library
    #song = AudioSegment.from_file("07 Black Blade.mp3")
    #first_10_seconds = song[:10000]
    #first_10_seconds.export(
    #        "mashup.mp3",
    #        format="mp3",
    #        tags={'artist': 'Various artists',
    #                'album': 'Best of 2011',
    #                'comments': 'This album is awesome!', 'Title': 'Track title'
    #        }
    #)

    file_name = "%s - %s.mp3" % (artist, title)
    print("  Cutting from %d to %d secs and saving to file %s" % (start_sec, end_sec, file_name))
    final_file_name = os.path.join(output_dir, file_name)
    finalsound = None
    if end_sec == -1:
        # Final time is not provided, split from the start to the end of the audio file
        finalsound = audiosegment[start_sec*1000:]
    else:
        finalsound = audiosegment[start_sec*1000:end_sec*1000]
    #TODO comments doesn't work :(
    finalsound.export(final_file_name, format="mp3", bitrate=bitrate, tags={'artist': artist, 'title': title, 'comments': source_url})


def get_secs_from_string(s):
    """Transform hh:mm:ss or mm:ss strings to their seconds representation.

    Returns the seconds that represent the given string or -1 is the string is null
    """
    if s is None:
        return -1
    l = s.split(':')
    secs = 0
    #for i in xrange(len(l) - 1, -1, -1):
    for i in reversed(xrange(len(l))): #more elegant, people say...
        secs = secs + int(l[i]) * pow(60, len(l) - i - 1)
    return secs


def read_tracks(file_name, parser_func):
    """Parses the description files with all the tracks time, title and author

    Different format of the desc row is managed by a different parser_func
    """

    # Parses track info and positions from the desc file
    #f = open(file_name, 'r')
    file_rows = []
    print "Parsing tracks information from file " + file_name
    with open(file_name, 'r') as f:
        for line in f:
            # Using write instead of print to skip the automatically added newline char
            sys.stdout.write("  " + line)
            # Calls the parser function
            data = parser_func(line)
            #print "#" + data['time'] + "#" + data['author'] + "#" + data['title'] + "#"
            file_rows.append(data)

    # Builds the final data for splitting tracks
    tracks_info = []
    print
    print "Creating tracks list from parsed information"
    for i in xrange(0, len(file_rows)):
        split_data = {}
        data = file_rows[i]
        print "  #%s# - #%s# - #%s#" % (data['time'], data['author'], data['title'])
        if (i < len(file_rows) - 1):
            split_data['end'] = file_rows[i+1]['time']
        else:
            split_data['end'] = None
        split_data['start'] = data['time']
        split_data['author'] = data['author']
        split_data['title'] = data['title']
        tracks_info.append(split_data)
    return tracks_info

def parser_one(line):
    """Parses string like 14:52 Immediate Music - Falling Into Inertia

    first space separates time from other info
    first dash separates author from title
    """
    data = {}

    # Finds first space char
    data['time'] = line[:line.find(' ')]
    desc = line[line.find(' '):].strip()
    data['author'] = desc[:desc.find('-')].strip()
    data['title'] = desc[desc.find('-')+1:].strip()
    return data

def parser_two(line):
    """Parses string like Immediate Music - Falling Into Inertia 14:52

    last space separates time from other info
    first dash separates author from title
    """
    data = {}

    # Finds last space char
    time_pos = line.rfind(' ')
    data['time'] = line[time_pos:].strip()
    desc = line[:time_pos].strip()
    data['author'] = desc[:desc.find('-')].strip()
    data['title'] = desc[desc.find('-')+1:].strip()
    return data

def enable_pydub_logging():
    """Enable Pydub logging
    """
    l = logging.getLogger("pydub.converter")
    l.setLevel(logging.DEBUG)
    l.addHandler(logging.StreamHandler())

def main():
    # More logging, in case of errors
    #enable_pydub_logging()

    # How to use this script
    #
    # Prerequisites
    #  $ pip install youtube-dl
    #  $ pip install pydub
    #
    # 1) Insert here the YT video URL
    YOUTUBE_URL = "https://www.youtube.com/watch?v=QcfIroNI3NU"

    # 2) Download the file you wanna split using
    #    $ youtube-dl -x %youtube_url%

    # 3) Check the maximum available bitrate of m4a_dash formats container using
    #    $ youtube-dl -F %youtube_url%
    BITRATE = "192k"

    # 4) Insert here the file name saved by youtube-dl, without extension
    FILENAME = "Epic Hits _ The Best of Epic Music 2014 - 1-Hour Full Cinematic - Epic Music Vn-QcfIroNI3NU"

    # 5) Comment the following two lines
    print "It seems you haven't read the instruction to run this script. Please do, they are in the source"
    return

    # 6) Now decomment the following three lines and run this script once
    ## Creates the .description file, it need to be edited!
    # create_desc_file(YOUTUBE_URL)
    # return

    # 7) Comment again the previous three lines. Then edit the description file
    #     created by the app, putting duration, author and track name in a format
    #     that the parser can understand

    # 8) Run again this script, the following part will be executed
    #    Change the parser passed to read_tracks method if the description
    #     file has a different format


    description_file = FILENAME + ".description"
    if not os.path.isfile(description_file):
        print "Description file %s doesn't exists" % (description_file)
        return
    # Please check the file extension
    downloaded_file = FILENAME + ".m4a"
    if not os.path.isfile(downloaded_file):
        print "Audio file %s doesn't exists" % (downloaded_file)
        return

    if not os.path.exists(FILENAME):
        os.makedirs(FILENAME)

    tracks_info = read_tracks(description_file, parser_one)

    print
    print "Now splitting the audio file in directory " + FILENAME
    full_song = AudioSegment.from_file(downloaded_file)
    for track in tracks_info:
        save_segment(
            full_song,
            FILENAME,
            get_secs_from_string(track['start']),
            get_secs_from_string(track['end']),
            BITRATE,
            track['author'],
            track['title'],
            YOUTUBE_URL
        )


if __name__ == "__main__":
    main()
	from __future__ import unicode_literals
	from pydub import AudioSegment
	import youtube_dl
	import logging
	import os
	import sys

	def create_desc_file(url):
	"""Creates a desc file directly from the YT file

	url can be a string like 'https://www.youtube.com/watch?v=QcfIroNI3NU'
	"""
	class MyLogger(object):
	def debug(self, msg):
	pass
	def warning(self, msg):
	pass
	def error(self, msg):
	print(msg)

	def my_hook(d):
	if d['status'] == 'finished':
	print('Done downloading, now converting ...')

	ydl_opts = {
	'writedescription': True,
	'skip_download': True,
	#'format': 'bestaudio/best',
	#'postprocessors': [{
	# 'key': 'FFmpegExtractAudio',
	# 'preferredcodec': 'mp3',
	# 'preferredquality': '192',
	#}],
	#'logger': MyLogger(),
	#'progress_hooks': [my_hook],
	}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])


	def save_segment(audiosegment, output_dir, start_sec, end_sec, bitrate, artist, title, source_url):
	"""Saves a segment of an audio file to a mp3 file.
	"""

	# Example on how to use the library
	#song = AudioSegment.from_file("07 Black Blade.mp3")
	#first_10_seconds = song[:10000]
	#first_10_seconds.export(
	# "mashup.mp3",
	# format="mp3",
	# tags={'artist': 'Various artists',
	# 'album': 'Best of 2011',
	# 'comments': 'This album is awesome!', 'Title': 'Track title'
	# }
	#)

	file_name = "%s - %s.mp3" % (artist, title)
	print(" Cutting from %d to %d secs and saving to file %s" % (start_sec, end_sec, file_name))
	final_file_name = os.path.join(output_dir, file_name)
	finalsound = None
	if end_sec == -1:
	# Final time is not provided, split from the start to the end of the audio file
	finalsound = audiosegment[start_sec*1000:]
	else:
	finalsound = audiosegment[start_sec1000:end_sec1000]
	#TODO comments doesn't work :(
	finalsound.export(final_file_name, format="mp3", bitrate=bitrate, tags={'artist': artist, 'title': title, 'comments': source_url})


	def get_secs_from_string(s):
	"""Transform hh:mm:ss or mm:ss strings to their seconds representation.

	Returns the seconds that represent the given string or -1 is the string is null
	"""
	if s is None:
	return -1
	l = s.split(':')
	secs = 0
	#for i in xrange(len(l) - 1, -1, -1):
	for i in reversed(xrange(len(l))): #more elegant, people say...
	secs = secs + int(l[i]) * pow(60, len(l) - i - 1)
	return secs


	def read_tracks(file_name, parser_func):
	"""Parses the description files with all the tracks time, title and author

	Different format of the desc row is managed by a different parser_func
	"""

	# Parses track info and positions from the desc file
	#f = open(file_name, 'r')
	file_rows = []
	print "Parsing tracks information from file " + file_name
	with open(file_name, 'r') as f:
	for line in f:
	# Using write instead of print to skip the automatically added newline char
	sys.stdout.write(" " + line)
	# Calls the parser function
	data = parser_func(line)
	#print "#" + data['time'] + "#" + data['author'] + "#" + data['title'] + "#"
	file_rows.append(data)

	# Builds the final data for splitting tracks
	tracks_info = []
	print
	print "Creating tracks list from parsed information"
	for i in xrange(0, len(file_rows)):
	split_data = {}
	data = file_rows[i]
	print " #%s# - #%s# - #%s#" % (data['time'], data['author'], data['title'])
	if (i < len(file_rows) - 1):
	split_data['end'] = file_rows[i+1]['time']
	else:
	split_data['end'] = None
	split_data['start'] = data['time']
	split_data['author'] = data['author']
	split_data['title'] = data['title']
	tracks_info.append(split_data)
	return tracks_info

	def parser_one(line):
	"""Parses string like 14:52 Immediate Music - Falling Into Inertia

	first space separates time from other info
	first dash separates author from title
	"""
	data = {}

	# Finds first space char
	data['time'] = line[:line.find(' ')]
	desc = line[line.find(' '):].strip()
	data['author'] = desc[:desc.find('-')].strip()
	data['title'] = desc[desc.find('-')+1:].strip()
	return data

	def parser_two(line):
	"""Parses string like Immediate Music - Falling Into Inertia 14:52

	last space separates time from other info
	first dash separates author from title
	"""
	data = {}

	# Finds last space char
	time_pos = line.rfind(' ')
	data['time'] = line[time_pos:].strip()
	desc = line[:time_pos].strip()
	data['author'] = desc[:desc.find('-')].strip()
	data['title'] = desc[desc.find('-')+1:].strip()
	return data

	def enable_pydub_logging():
	"""Enable Pydub logging
	"""
	l = logging.getLogger("pydub.converter")
	l.setLevel(logging.DEBUG)
	l.addHandler(logging.StreamHandler())

	def main():
	# More logging, in case of errors
	#enable_pydub_logging()

	# How to use this script
	#
	# Prerequisites
	# $ pip install youtube-dl
	# $ pip install pydub
	#
	# 1) Insert here the YT video URL
	YOUTUBE_URL = "https://www.youtube.com/watch?v=QcfIroNI3NU"

	# 2) Download the file you wanna split using
	# $ youtube-dl -x %youtube_url%

	# 3) Check the maximum available bitrate of m4a_dash formats container using
	# $ youtube-dl -F %youtube_url%
	BITRATE = "192k"

	# 4) Insert here the file name saved by youtube-dl, without extension
	FILENAME = "Epic Hits _ The Best of Epic Music 2014 - 1-Hour Full Cinematic - Epic Music Vn-QcfIroNI3NU"

	# 5) Comment the following two lines
	print "It seems you haven't read the instruction to run this script. Please do, they are in the source"
	return

	# 6) Now decomment the following three lines and run this script once
	## Creates the .description file, it need to be edited!
	# create_desc_file(YOUTUBE_URL)
	# return

	# 7) Comment again the previous three lines. Then edit the description file
	# created by the app, putting duration, author and track name in a format
	# that the parser can understand

	# 8) Run again this script, the following part will be executed
	# Change the parser passed to read_tracks method if the description
	# file has a different format


	description_file = FILENAME + ".description"
	if not os.path.isfile(description_file):
	print "Description file %s doesn't exists" % (description_file)
	return
	# Please check the file extension
	downloaded_file = FILENAME + ".m4a"
	if not os.path.isfile(downloaded_file):
	print "Audio file %s doesn't exists" % (downloaded_file)
	return

	if not os.path.exists(FILENAME):
	os.makedirs(FILENAME)

	tracks_info = read_tracks(description_file, parser_one)

	print
	print "Now splitting the audio file in directory " + FILENAME
	full_song = AudioSegment.from_file(downloaded_file)
	for track in tracks_info:
	save_segment(
	full_song,
	FILENAME,
	get_secs_from_string(track['start']),
	get_secs_from_string(track['end']),
	BITRATE,
	track['author'],
	track['title'],
	YOUTUBE_URL
	)


	if __name__ == "__main__":
	main()