Skip to content

Instantly share code, notes, and snippets.

@gu-ma
Last active March 7, 2017 22:08
Show Gist options
  • Save gu-ma/300eb77ed45f0d6cd1822bcdcfdbd979 to your computer and use it in GitHub Desktop.
Save gu-ma/300eb77ed45f0d6cd1822bcdcfdbd979 to your computer and use it in GitHub Desktop.
Parse MIDI files, convert them to ABC, and split instruments into different files
import os
import sys
import subprocess
# from collections import Counter
# import re
# Variables
walk_dir = sys.argv[1]
out_dir = sys.argv[2]
set_name = sys.argv[3]
print('walk_dir = ' + walk_dir)
print('out_dir = ' + out_dir)
print('set_name = ' + set_name)
# Set the file paths
# Output file for all the abc files concatenated
abc_file_path = os.path.join(out_dir, set_name + '.txt')
# "Clean" file
abc_file_path_clean = os.path.join(out_dir, set_name + '_clean.txt')
# Browse all folders and sub folders
for root, subdirs, files in os.walk(walk_dir):
print('--\nroot = ' + root)
with open(abc_file_path, 'a') as abc_file:
for subdir in subdirs:
print('\t- subdirectory ' + subdir)
for filename in files:
file_path = os.path.join(root, filename)
# print('\t- file %s (full path: %s)' % (filename, file_path))
print('\t- file %s ' % filename)
if filename.lower().endswith('.mid'):
p = subprocess.Popen(
["midi2abc", file_path], stdout=subprocess.PIPE)
(output, err) = p.communicate()
abc_file.write(output)
# cleanup: Comments, Errors, Headers and other useless stuff
# word_list = ['Error', '% ', 'X:', 'T:', 'M:', 'L:', 'Q:', 'V:', 'w:', 'x8| \\\n', 'x8|\n'] #
# cleanup: Comments, Errors, some silences
word_list = ['Error', '% ', 'x8| \\\n', 'x8|\n']
with open(abc_file_path, 'rU') as oldfile, open(abc_file_path_clean, 'w') as newfile:
for line in oldfile:
if not any(word in line for word in word_list):
newfile.write(line)
# separate instruments in different files
# http://soundprogramming.net/file-formats/general-midi-instrument-list/
instruments = [
('drum', range(-1)), # drum use a different midi notation!
('piano', range(0, 7)),
('percussion', range(8, 15)),
('organ', range(16, 23)),
('guitar', range(24, 31)),
('bass', range(32, 39)),
('strings', range(40, 55)),
('brass', range(56, 63)),
('reed', range(64, 71)),
('pipe', range(72, 79)),
('synth_lead', range(80, 103)),
('ethnic', range(104, 111)),
('percussive', range(112, 119)),
('effects', range(119, 127))
]
# print("instruments :" + str(instruments))
# abc_text = [ "" for i in range(len(instruments))]
abc_text = {n: "" for n, r in instruments}
print("abc_text :" + str(abc_text))
# Count the number of lines in the output file
with open(abc_file_path_clean) as f:
for line_total, line in enumerate(f):
pass
print(line_total)
# this loop here is very inneficient
instrument_name = ""
with open(abc_file_path_clean, 'rU') as f:
for line_count, line in enumerate(f):
# Start of a new track we set the instrument_name to nothing
if "V:" in line:
instrument_name = ""
# If we are reading the line of an instrument we save the line
elif instrument_name != "" and not "MIDI" in line:
abc_text[instrument_name] += line
# Exception for drums
if "MIDI channel 10" in line:
instrument_name = 'drum'
# If the line contain an instrument index set the instrument_name
elif instrument_name != 'drum':
for name, midi_range in instruments:
if any("MIDI program " + str(i) in line for i in midi_range):
instrument_name = name
# Progress
pct = str(100 * float(line_count) / float(line_total))
print(pct[:2] + " % of " + str(line_count) + " / " +
str(line_total) + " " + instrument_name)
# write to files
for key, value in abc_text.items():
abc_file_path = os.path.join(out_dir, set_name + '_' + key + '.txt')
with open(abc_file_path, 'w') as f:
print("writing: " + str(abc_file_path))
f.write(value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment