Skip to content

Instantly share code, notes, and snippets.

@benosteen
Created January 21, 2015 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benosteen/822fef58c6c92b120fff to your computer and use it in GitHub Desktop.
Save benosteen/822fef58c6c92b120fff to your computer and use it in GitHub Desktop.
Pulling metadata together from id3 tag XML files
import os
from xml.etree import ElementTree as ET
import csv
OUTPUT = "/path/to/output.csv"
PATH = "/path/to/root/folder/that/has/all/the/xmls"
HEADERS = ['ALBUM', 'TITLE', 'ARTIST', 'GENRE', 'TRACKNUMBER', 'COMMENTS', 'YEAR', 'id3v2', 'PATH', 'FROM_FILENAME', 'id3v2']
# Assuming UTF-8...
ENC = "utf-8"
def to_row(path, filename, doc):
row = {}
for tag in doc.findall("tag"):
row[tag.get('name')] = tag.get('value')
row['PATH'] = path
row['FROM_FILENAME'] = filename
for k in row.keys():
if k not in HEADERS:
print k
return row
with open(OUTPUT, "w") as o:
spool = csv.DictWriter(o, fieldnames=HEADERS)
spool.writeheader()
for (root, dir, files) in os.walk(PATH):
print("{0} --> {1} files".format(root, str(len(files))))
for filename in [f for f in files if f.endswith("xml")]:
try:
with open(os.path.join(root, filename), "r") as src:
doc = ET.fromstring(src.read().decode(ENC))
row = to_row(root, filename, doc)
spool.writerow(row)
except Exception as e:
print e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment