Last active
August 1, 2020 01:07
-
-
Save albert-yu/f9f1c5883c1a3f167b314d24259372f3 to your computer and use it in GitHub Desktop.
A supposedly fun thing I'll never do again
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this script was used to convert instrumentation data from IMSLP | |
# to a collection of objects, decoupling instrument count and misc. | |
# notes from the long string blob | |
import os | |
import json | |
import re | |
files = os.listdir(".") | |
jsons = filter(lambda x: x.endswith(".json"), files) | |
split_pattern = "\n|,\s*(?![^()]*\))" | |
cnt_prog = re.compile("\d+") | |
notes_prog = re.compile("\(.*\)") | |
for jsonfile in jsons: | |
d = None | |
with open(jsonfile, "r") as f: | |
d = json.load(f) | |
imslp_info = None | |
if "imslp_info" in d: | |
imslp_info = d["imslp_info"] | |
if not imslp_info: | |
continue | |
if "First Publication." in imslp_info: | |
d["first_published"] = imslp_info["First Publication."] | |
if "Instrumentation" in imslp_info: | |
formatted = [] | |
instr = imslp_info["Instrumentation"] | |
parts = re.split(split_pattern, instr) | |
for part in parts: | |
part = part.strip() | |
instr_part = dict() | |
# extract instrument count and notes | |
count_m = cnt_prog.search(part) | |
# indices of instrument start and end | |
name_start = 0 | |
name_end = len(part) | |
if count_m: | |
# extract only the number portion | |
count_m2 = re.search("\d+", count_m.group(0)) | |
count = int(count_m2.group(0)) | |
instr_part["count"] = count | |
# start where the count ends | |
name_start = count_m2.end() | |
notes_m = notes_prog.search(part) | |
if notes_m: | |
with_parens = notes_m.group(0) | |
without_parens = with_parens[1:len(with_parens) - 1] | |
instr_part["notes"] = without_parens | |
# name ends where the notes start | |
name_end = notes_m.start() | |
# extract instrument name | |
instr_name = part[name_start:name_end].strip() | |
instr_part["name"] = instr_name | |
# ignore empty strings | |
if len(instr_name) == 0: | |
continue | |
formatted.append(instr_part) | |
d["arrangement"] = formatted | |
with open(jsonfile, "w") as f: | |
json.dump(d, f) | |
print(jsonfile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment