Last active
November 20, 2016 01:33
-
-
Save lynnntropy/89c43f02f3657e7f4f0e to your computer and use it in GitHub Desktop.
LyricsReloaded MusixMatch Provider (place in %appdata%\MusicBee\mb_LyricsReloaded)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Musixmatch" | |
variables: | |
artist: | |
type: artist | |
filters: | |
- strip_diacritics | |
- lowercase | |
- [replace, "!!!", "artist-46206"] # !!! (Chk Chk Chk) artist exception | |
- [replace, "+/-", "p%m"] # +/- artist janky exception (step 1) | |
- [regex, '(?<=\W|\s)+(feat.+|ft[\W\s]+|(f\.\s)).+', ""] | |
# ^ Strip F./ft/eat/uring + everything after | |
- [regex, "'", "%27"] # URL encode "'" bc MM is cool w/ dat shit | |
- [regex, '\s&\s(?=the)', " and "] # ONLY if succeeded by "the", replace " & " with " and " | |
## ^ Currently superfluous bc plugin preemptively replaces "&" with "and" | |
### ^ So MM will choke if <artist> contains "& (?!the)" | |
- [regex, '(?<=[a-z0-9%])[^\sa-z0-9%]+(?=[a-z0-9%]+)', "-"] | |
# ^ Replace medial non-alphanumeric char(s) with single "-" (except "'") e.g., M.I.A. > M-I-A. | |
- [regex, '\W+(?=$)', ""] # Strip end-of-string non-word chars | |
- [regex, '((?<=\s)([^a-z0-9\s-])+(\s|\W)+)|((?<=\w)([^a-z0-9-])+(\s|\W)+)', " "] | |
# ^ Clean up any remaining successive non-alphanumeric char(s) before strip_nonascii | |
## ^ Long & stupid bc couldn't figure out YAML-friendly non-capturing "(?:...)" | |
### EZ version: [regex, '\W+(?:\W)(?<!$)', " "] | |
- [strip_nonascii, -] | |
- [replace, "-27", "%27"] # Fix "'" URL encoding after strip_nonascii | |
- [regex, '(?<=^)p-m', "-"] # Janky replace for artist +/- after strip_nonascii (step 2) | |
title: | |
type: title | |
filters: # Not using identical artist filters due to different treatment of "&" in titles vs artist names | |
# Musixmatch strips "&" from <title> ALWAYS; from <artist> too, *unless* "& the", then replaced with "and" | |
## ^ see "&" treatment note above, under 'artist' | |
- strip_diacritics | |
- lowercase | |
- [regex, '(?<=\W|\s)+(feat.+|ft[\W\s]+|(f\.\s)).+', ""] | |
# ^ Strip F./ft/eat/uring + everything after | |
- [regex, "'", "%27"] # URL encode "'" bc MM is cool w/ dat shit | |
- [regex, '(?<=[a-z0-9%])[^\sa-z0-9%]+(?=[a-z0-9%]+)', "-"] | |
# ^ Replace medial non-alphanumeric char(s) with single "-" (except "'") e.g., f**k > f-k | |
- [regex, '\W+(?=$)', ""] # Strip end-of-string non-word chars | |
- [regex, '((?<=\s)([^a-z0-9\s-])+(\s|\W)+)|((?<=\w)([^a-z0-9-])+(\s|\W)+)', " "] | |
# ^ Clean up any remaining successive non-alphanumeric char(s) before strip_nonascii | |
- [strip_nonascii, -] | |
- [replace, "-27", "%27"] # Fix "'" URL encoding after strip_nonascii | |
config: | |
url: "http://www.musixmatch.com/lyrics/{artist}/{title}" | |
pattern: ['<span\s+id="lyrics-html"[^>]*?>(?<lyrics>.*?)</span>', s] | |
post-filters: | |
- utf8_encode |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Shouldn't the file be musixmatch.com.yml (not yaml)?