Skip to content

Instantly share code, notes, and snippets.

@astoeckel
Created July 2, 2016 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save astoeckel/1aee40ea5c2995f655bbeba520b94a7c to your computer and use it in GitHub Desktop.
Save astoeckel/1aee40ea5c2995f655bbeba520b94a7c to your computer and use it in GitHub Desktop.
Small script to copy the song ratings between Rhythmbox databases
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
import sys
import unicodedata
import difflib
if len(sys.argv) != 4:
print("Usage: copy_rating <OLD DB> <NEW DB> <TARGET>")
sys.exit(1)
def build_id(root, tags_to_use):
res = "" if not isinstance(root.text, str) else root.text.strip()
for child in root:
if child.tag in tags_to_use:
child_id = build_id(child, tags_to_use)
if len(res) > 0 and len(child_id) > 0:
res = res + '/' + child_id
else:
res = res + child_id
return res
def normalise(s):
nfkd_form = unicodedata.normalize('NFKD', s)
s = nfkd_form.encode('ASCII', 'ignore').strip().lower()
return s
def build_shingles(s, l=5):
s = normalise(s)
res = []
if len(s) > 0:
for i in range(max(l, len(s)) - l + 1):
res.append(s[i:i+l])
return res
def has_child(root, tag_name):
for x in root.iter(tag_name):
return True
return False
def build_index(index, root, elem_name, tags_to_copy, tags_to_use):
for child in root:
if child.tag == elem_name:
has_tag = False
for tag_to_copy in tags_to_copy:
if has_child(child, tag_to_copy):
has_tag = True
if has_tag:
for shingle in build_shingles(build_id(child, tags_to_use)):
if shingle in index:
index[shingle].append(child)
else:
index[shingle] = [child]
else:
build_index(index, child, elem_name, tags_to_copy, tags_to_use)
def copy_tags(index, root, elem_name, tags_to_copy, tags_to_use):
for child in root:
if child.tag == elem_name:
# Skip elements which already have all the tags that should be
# copied to them
has_tag = True
for tag_to_copy in tags_to_copy:
if not has_child(child, tag_to_copy):
has_tag = False
if has_tag:
continue
# Build the id and shingles of this element
new_id = build_id(child, tags_to_use)
shingles = build_shingles(new_id)
# Fetch corresponding old ids and write them to a map
old_elems = {}
for shingle in shingles:
if shingle in index:
for elem in index[shingle]:
old_id = build_id(elem, tags_to_use)
old_elems[old_id] = elem
# For each old entry calculate the similarity to the new entry,
# remember the best one
best_sim = 0.0
best_elem = None
best_id = ""
for old_id in old_elems.keys():
sim = difflib.SequenceMatcher(a=new_id, b=old_id).ratio()
if sim > best_sim:
best_sim = sim
best_elem = old_elems[old_id]
best_id = old_id
# Copy the given tags to the current element from the best matching
if best_sim > 0.95:
print(new_id, " --> ", best_id)
for tag_to_copy in tags_to_copy:
if not has_child(child, tag_to_copy):
for src in best_elem.iter(tag_to_copy):
child.append(src)
print(tag_to_copy, ": ", src.text)
break
else:
copy_tag(index, child, elem_name, tags_to_copy, tags_to_use)
print("Loading old and new database...")
old_db_tree = ET.parse(sys.argv[1])
new_db_tree = ET.parse(sys.argv[2])
old_db = old_db_tree.getroot()
new_db = new_db_tree.getroot()
print("Building index...")
index = {}
tags_to_use = ["title", "artist", "album", "duration"]
tags_to_copy = ["rating"]
build_index(index, old_db, "entry", tags_to_copy, tags_to_use)
print("Copying ratings from the old to the new file...")
copy_tags(index, new_db, "entry", tags_to_copy, tags_to_use)
print("Writing updated database to file...")
new_db_tree.write(sys.argv[3], encoding="UTF-8")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment