Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
import csv, os
from collections import Counter
# initialise a list and put header in it.
sheet = [["id","title","artist","running time","intro length","total number of elements","formal elements","song elements","movement","repetition","key changes","metric changes","title words count"]]
# change the working directory.
# given that you have a hieralchy like this.
# +- hoge
# +- dataset
# | +- <hundreds of folders>
# | +- salami_chords.txt
# +- McGill_CSVgen.py
os.chdir("./dataset")
# main loop.
for nr in range(1300):
# initialize lists and variables.
row = []
elements = []
parts = []
song = []
tonic = -1
metre = -1
# generate a path and check if it is valid.
dirName = "./" + '{0:04d}'.format(nr + 1)
if os.path.exists(dirName) == False:
sheet.append([nr + 1,])
continue
# if the generated path is valid, change working directory with it.
os.chdir(dirName)
# add index Nr. to the first of 'row' list.
row.append(nr + 1)
# read a txt file and parse it line by line, in order to get necessary informations.
path = open("./salami_chords.txt", "r")
for line in path:
# get song informations such as song titles or artist's names and add them to the 'row' list. when needed, increment some variables instead of adding the item to the list.
if line[0] == "#":
if "tonic" in line:
tonic += 1
continue
if "metre" in line:
metre += 1
continue
row.append(line.split(":")[1].strip())
# get timings where song's structural elements start and add these timestamp to the 'elements' list. Also get their attributes (like part A, B, C... or verse, chorus, bridge...) and add them to the 'parts' or 'song' lists for later use.
if "|" in line:
if "," in line.split("|")[0]:
elements.append(float(line.split()[0]))
parts.append(line.split()[1].strip(","))
song.append(line.split()[2].strip(","))
# get song's end time (this too added to the 'elements' list)
if " end\n" in line:
elements.append(float(line.split()[0]))
# now, it's time to calculate.
# get song's total length.
t = elements[len(elements) - 1] - elements[0]
row.append(t)
# get song's intro length.
i = elements[1] - elements[0]
row.append(i)
# get the total number of elements.
e = len(elements)
row.append(e)
# get the number of formal elements.
p = len(Counter(parts).items())
row.append(p)
# get the number of song elements.
f = len(Counter(song).items())
row.append(f)
# get the movement of the song.
m = p / e
row.append(m)
# get the repetition level.
ur = (f / e) * t
r = t / ur
row.append(r)
# get the number of changes of keys
row.append(tonic)
# get the number of changes of metre.
row.append(metre)
# add the song title's number of words.
words = len(row[1].split())
row.append(words)
# add this whole 'row' list to 'sheet' list, which is gonna be a 2d-list.
sheet.append(row)
# get back to the upper directory for the next loop.
os.chdir("../")
# check if it works or not.
print(sheet)
# getting out of loop, 'sheet' list now is a vast spreadsheet.
# make it a csv file for later use.
f = open('./billboard.csv', 'w')
writer = csv.writer(f, lineterminator='\n')
writer.writerows(sheet)
f.close()
# well, have you finished these tasks?
print("Finished!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment