Skip to content

Instantly share code, notes, and snippets.

@dansuh17
Created November 15, 2017 09:18
Show Gist options
  • Save dansuh17/f7c4f8aa8e43c06d01e470f5d675f155 to your computer and use it in GitHub Desktop.
Save dansuh17/f7c4f8aa8e43c06d01e470f5d675f155 to your computer and use it in GitHub Desktop.
Convert GTZAN dataset to mp3 files.
import subprocess
import os
def convert_gtzan_to_mp3(gtzan_folder_path, output_folder_name):
# recursively walk through the directory
for dirpath, dirnames, filenames in os.walk(gtzan_folder_path):
for filename in filenames:
input_filepath = os.path.join(dirpath, filename)
output_folderpath = dirpath.replace(gtzan_folder_path, output_folder_name)
# create the folder if the folder does not exist
if not os.path.exists(output_folderpath):
print('creating folder : {}'.format(output_folderpath))
os.makedirs(output_folderpath)
output_filename = os.path.join(output_folderpath, filename[:-2] + 'mp3')
print('converting : {}'.format(filename))
# use ffmpeg to convert file to mp3 formats
# -y overrites any existing files
# -i specifies input file
# -codec:a denotes that data is audio, and specifies the code to use
# -b:a 128k = audio bitrate 128k
completed_process = subprocess.run(
'ffmpeg -y -i {} -codec:a libmp3lame -b:a 128k {}'
.format(input_filepath, output_filename),
shell=True, check=True)
# checks that it has been converted properly
completed_process.check_returncode()
if __name__ == '__main__':
# this script must be placed with gtzan_au folder
convert_gtzan_to_mp3('./gtzan_au', 'gtzan_mp3')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment