Last active
December 7, 2016 02:58
-
-
Save marckean/fc2c714259ec3e5f296948ef766dd3dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Specify the directory here of the music in which you want to update on a song by song basis | |
$mp3s = Get-ChildItem -Path "C:\Music" -Recurse | ? {$_.Extension -eq '.mp3' -and $_.Directory -match 'AnyDirectoryName'} | |
$taglib = "..\taglib-sharp.dll" | |
[system.reflection.assembly]::loadfile($taglib) | |
Function CheckSong ($song, $against){ | |
$Results = @() | |
$1stResults = @() | |
$2ndResults = @() | |
$against | % { | |
$h = new-object psObject | select DistanceNumber,Song | |
$h.DistanceNumber = (Get-LongestCommonSubstring $song ($_ -replace '\(.*\)','')).length | |
$h.Song = $_ | |
$1stResults += $h | |
} | |
($1stResults | sort DistanceNumber | select -Last 50).song | % { | |
$x = new-object psObject | select DistanceNumber,Song | |
$x.DistanceNumber = Get-FuzzyMatchScore $song ($_ -replace '\(.*\)','') | |
$x.Song = $_ | |
$2ndResults += $x | |
} | |
if((($2ndResults | sort -Descending DistanceNumber)[0]).DistanceNumber -lt '1600'){ | |
$null = (($song -split ' - ')[0]) -match '[0-9A-Za-z\x2d]+';$artist = $matches[0] | |
$title = (($song -split ' - ')[1]) | |
foreach($2ndresult in $2ndResults){ | |
if(($2ndresult.Song -split ' - ')[0] -match $artist -and ($2ndresult.Song -split ' - ')[1] -match $title) | |
{$Results = $2ndresult} | |
} | |
} | |
if((!($Results))) {$Results = $2ndResults} | |
$Results | |
} | |
Function Get-DamerauLevenshteinDistance { | |
param ($String1,$String2) | |
$Length1 = $String1.length | |
$Length2 = $String2.length | |
$Global:Hash_Matrix = @{} # Create Matrix | |
for ($i=0;$i -lt ($Length1 + 1);$i++) {$Hash_Matrix[$i] = [array]::CreateInstance([int],($Length2 + 1))} | |
for ($i=0;$i -lt $Length1; $i++) {$Hash_Matrix[$i][0] = $i } | |
for ($j=0;$j -lt $Length2; $j++) {$Hash_Matrix[0][$j] = $j} | |
for ($i=1;$i -lt $Length1;$i++) | |
{for ($j=1;$j -lt $Length2;$j++){ | |
if ($String1[$i -1] -eq $String2[$j -1]) | |
{$Cost = 0} else {$Cost = 1} | |
$Hash_Matrix[$i][$j] = (@(($Hash_Matrix[$i-1][$j]) + 1;($Hash_Matrix[$i][$j-1]) + 1;($Hash_Matrix[$i-1][$j-1]) + $Cost) | sort-object)[0] | |
if (($i -gt 1) -AND ($j -gt 1) -AND ($String1[$i -1] -eq $String2[$j -2]) -AND ($String1[$i -2] -eq $String2[$j -1])){$Hash_Matrix[$i][$j] = (@(($Hash_Matrix[$i][$j]) | |
($Hash_Matrix[$i-2][$j-2] + $Cost))|sort-object)[0] | |
} | |
} | |
} | |
$Hash_Matrix[$Length1 -1][$Length2 -1] | |
} | |
function Get-FuzzyMatchScore { | |
[CmdletBinding()] | |
param ( | |
[Parameter(Position = 0)] | |
[string] $Search, | |
[Parameter(Position = 1)] | |
[string] $String | |
) | |
$score = 100 | |
# Use approximate string matching to get some values needed to calculate the score of the result | |
$longestCommonSubstring = Get-LongestCommonSubstring -String1 $String -String2 $Search | |
$levenshteinDistance = Get-LevenshteinDistance -String1 $String -String2 $Search | |
$commonPrefix = Get-CommonPrefix -String1 $String -String2 $Search | |
# By running the result through this regex pattern we get the length of the match as well as the | |
# the index of where the match starts. The shorter the match length and the index, the more | |
# score will be added for the match. | |
$regexMatchFilter = $Search.ToCharArray() -join '.*?' | |
$match = Select-String -InputObject $String -Pattern $regexMatchFilter -AllMatches | |
$matchLength = ($match.Matches | Sort-Object Length | Select-Object -First 1).Value.Length | |
$matchIndex = ($match.Matches | Sort-Object Length | Select-Object -First 1).Index | |
# Calculate score | |
$score = $score - $levenshteinDistance | |
$score = $score * $longestCommonSubstring.Length | |
$score = $score - $matchLength | |
$score = $score - $matchIndex | |
if ($commonPrefix) { | |
$score = $score + $commonPrefix.Length | |
} | |
Write-Output $score | |
} | |
function Get-HammingDistance { | |
<# | |
.SYNOPSIS | |
Get the Hamming Distance between two strings or two positive integers. | |
.DESCRIPTION | |
The Hamming distance between two strings of equal length is the number of positions at which the | |
corresponding symbols are different. In another way, it measures the minimum number of substitutions | |
required to change one string into the other, or the minimum number of errors that could have | |
transformed one string into the other. Note! Even though the original Hamming algorithm only works for | |
strings of equal length, this function supports strings of unequal length as well. | |
The function also calculates the Hamming distance between two positive integers (considered as binary | |
values); that is, it calculates the number of bit substitutions required to change one integer into | |
the other. | |
.EXAMPLE | |
Get-HammingDistance 'karolin' 'kathrin' | |
Calculate the Hamming distance between the two strings. The result is 3. | |
.EXAMPLE | |
Get-HammingDistance 'karolin' 'kathrin' -NormalizedOutput | |
Calculate the normalized Hamming distance between the two strings. The result is 0.571428571428571. | |
.EXAMPLE | |
Get-HammingDistance -Int1 61 -Int2 15 | |
Calculate the hamming distance between 61 and 15. The result is 3. | |
.LINK | |
http://en.wikipedia.org/wiki/Hamming_distance | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.NOTES | |
Author: Øyvind Kallstad | |
Date: 03.11.2014 | |
Version: 1.0 | |
#> | |
[CmdletBinding(DefaultParameterSetName = 'String')] | |
param ( | |
[Parameter(Position = 0, Mandatory = $true, ParameterSetName = 'String')] | |
[ValidateNotNullOrEmpty()] | |
[string] $String1, | |
[Parameter(Position = 1, Mandatory = $true, ParameterSetName = 'String')] | |
[ValidateNotNullOrEmpty()] | |
[string] $String2, | |
[Parameter(Position = 0, Mandatory = $true, ParameterSetName = 'Integer')] | |
[ValidateNotNullOrEmpty()] | |
[uint32] $Int1, | |
[Parameter(Position = 1, Mandatory = $true, ParameterSetName = 'Integer')] | |
[ValidateNotNullOrEmpty()] | |
[uint32] $Int2, | |
# Makes matches case-sensitive. By default, matches are not case-sensitive. | |
[Parameter(ParameterSetName = 'String')] | |
[switch] $CaseSensitive, | |
# Normalize the output value. When the output is not normalized the maximum value is the length of the longest string, and the minimum value is 0, | |
# meaning that a value of 0 is a 100% match. When the output is normalized you get a value between 0 and 1, where 1 indicates a 100% match. | |
[Parameter(ParameterSetName = 'String')] | |
[switch] $NormalizeOutput | |
) | |
try { | |
if ($PSCmdlet.ParameterSetName -eq 'String') { | |
# handle case insensitivity | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
# set initial distance | |
$distance = 0 | |
# get max and min length of the input strings | |
$maxLength = [Math]::Max($String1.Length,$String2.Length) | |
$minLength = [Math]::Min($String1.Length,$String2.Length) | |
# calculate distance for the length of the shortest string | |
for ($i = 0; $i -lt $minLength; $i++) { | |
if (-not($String1[$i] -ceq $String2[$i])) { | |
$distance++ | |
} | |
} | |
# add the remaining length to the distance | |
$distance = $distance + ($maxLength - $minLength) | |
if ($NormalizeOutput) { | |
Write-Output (1 - ($distance / $maxLength)) | |
} | |
else { | |
Write-Output $distance | |
} | |
} | |
else { | |
$distance = 0 | |
$value = $Int1 -bxor $Int2 | |
while ($value -ne 0) { | |
$distance++ | |
$value = $value -band ($value - 1) | |
} | |
Write-Output $distance | |
} | |
} | |
catch { | |
Write-Warning $_.Exception.Message | |
} | |
} | |
function Get-LevenshteinDistance { | |
<# | |
.SYNOPSIS | |
Get the Levenshtein distance between two strings. | |
.DESCRIPTION | |
The Levenshtein Distance is a way of quantifying how dissimilar two strings (e.g., words) are to one another by counting the minimum number of operations required to transform one string into the other. | |
.EXAMPLE | |
Get-LevenshteinDistance 'kitten' 'sitting' | |
.LINK | |
http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.23 | |
http://en.wikipedia.org/wiki/Edit_distance | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.NOTES | |
Author: Øyvind Kallstad | |
Date: 07.11.2014 | |
Version: 1.0 | |
#> | |
[CmdletBinding()] | |
param( | |
[Parameter(Position = 0)] | |
[string]$String1, | |
[Parameter(Position = 1)] | |
[string]$String2, | |
# Makes matches case-sensitive. By default, matches are not case-sensitive. | |
[Parameter()] | |
[switch] $CaseSensitive, | |
# A normalized output will fall in the range 0 (perfect match) to 1 (no match). | |
[Parameter()] | |
[switch] $NormalizeOutput | |
) | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
$d = New-Object 'Int[,]' ($String1.Length + 1), ($String2.Length + 1) | |
try { | |
for ($i = 0; $i -le $d.GetUpperBound(0); $i++) { | |
$d[$i,0] = $i | |
} | |
for ($i = 0; $i -le $d.GetUpperBound(1); $i++) { | |
$d[0,$i] = $i | |
} | |
for ($i = 1; $i -le $d.GetUpperBound(0); $i++) { | |
for ($j = 1; $j -le $d.GetUpperBound(1); $j++) { | |
$cost = [Convert]::ToInt32((-not($String1[$i-1] -ceq $String2[$j-1]))) | |
$min1 = $d[($i-1),$j] + 1 | |
$min2 = $d[$i,($j-1)] + 1 | |
$min3 = $d[($i-1),($j-1)] + $cost | |
$d[$i,$j] = [Math]::Min([Math]::Min($min1,$min2),$min3) | |
} | |
} | |
$distance = ($d[$d.GetUpperBound(0),$d.GetUpperBound(1)]) | |
if ($NormalizeOutput) { | |
Write-Output (1 - ($distance) / ([Math]::Max($String1.Length,$String2.Length))) | |
} | |
else { | |
Write-Output $distance | |
} | |
} | |
catch { | |
Write-Warning $_.Exception.Message | |
} | |
} | |
function Get-LongestCommonSubstring { | |
<# | |
.SYNOPSIS | |
Get the longest common substring of two strings. | |
.DESCRIPTION | |
Get the longest common substring of two strings. | |
.EXAMPLE | |
Get-LongestCommonSubstring 'Karolin' 'kathrin' -CaseSensitive | |
.LINK | |
https://fuzzystring.codeplex.com/ | |
http://en.wikipedia.org/wiki/Longest_common_substring_problem | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.NOTES | |
Adapted to PowerShell from code by Kevin Jones (https://fuzzystring.codeplex.com/) | |
Author: Øyvind Kallstad | |
Date: 03.11.2014 | |
Version: 1.0 | |
#> | |
[CmdletBinding()] | |
param ( | |
[Parameter(Position = 0)] | |
[string] $String1, | |
[Parameter(Position = 1)] | |
[string] $String2, | |
[Parameter()] | |
[switch] $CaseSensitive | |
) | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
$array = New-Object 'Object[,]' $String1.Length, $String2.Length | |
$stringBuilder = New-Object System.Text.StringBuilder | |
$maxLength = 0 | |
$lastSubsBegin = 0 | |
for ($i = 0; $i -lt $String1.Length; $i++) { | |
for ($j = 0; $j -lt $String2.Length; $j++) { | |
if ($String1[$i] -cne $String2[$j]) { | |
$array[$i,$j] = 0 | |
} | |
else { | |
if (($i -eq 0) -or ($j -eq 0)) { | |
$array[$i,$j] = 1 | |
} | |
else { | |
$array[$i,$j] = 1 + $array[($i - 1),($j - 1)] | |
} | |
if ($array[$i,$j] -gt $maxLength) { | |
$maxLength = $array[$i,$j] | |
$thisSubsBegin = $i - $array[$i,$j] + 1 | |
if($lastSubsBegin -eq $thisSubsBegin) { | |
[void]$stringBuilder.Append($String1[$i]) | |
} | |
else { | |
$lastSubsBegin = $thisSubsBegin | |
$stringBuilder.Length = 0 | |
[void]$stringBuilder.Append($String1.Substring($lastSubsBegin, (($i + 1) - $lastSubsBegin))) | |
} | |
} | |
} | |
} | |
} | |
Write-Output $stringBuilder.ToString() | |
} | |
function Get-CommonPrefix { | |
<# | |
.SYNOPSIS | |
Find the common prefix of two strings. | |
.DESCRIPTION | |
This function will get the common prefix of two strings; that is, all | |
the letters that they share, starting from the beginning of the strings. | |
.EXAMPLE | |
Get-CommonPrefix 'Card' 'Cartoon' | |
Will get the common prefix of both string. Should output 'car'. | |
.LINK | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.INPUTS | |
System.String | |
.OUTPUTS | |
System.String | |
.NOTES | |
Author: Øyvind Kallstad | |
Date: 03.11.2014 | |
Version 1.1 | |
Dependencies: none | |
#> | |
[CmdletBinding()] | |
param( | |
[Parameter(Mandatory = $true, Position = 0)] | |
[ValidateNotNullOrEmpty()] | |
[string]$String1, | |
[Parameter(Mandatory = $true, Position = 1)] | |
[ValidateNotNullOrEmpty()] | |
[string]$String2, | |
# Maximum length of the returned prefix. | |
[Parameter()] | |
[int]$MaxPrefixLength, | |
# Makes matches case-sensitive. By default, matches are not case-sensitive. | |
[Parameter()] | |
[switch] $CaseSensitive | |
) | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
$outputString = New-Object 'System.Text.StringBuilder' | |
$shortestStringLength = [Math]::Min($String1.Length,$String2.Length) | |
# Let the maximum prefix length be the same as the length of the shortest of | |
# the two input strings, unless defined by the MaxPrefixLength parameter. | |
if (($shortestStringLength -lt $MaxPrefixLength) -or ($MaxPrefixLength -eq 0)) { | |
$MaxPrefixLength = $shortestStringLength | |
} | |
# Loop from the start and add any characters found that are equal | |
for ($i = 0; $i -lt $MaxPrefixLength; $i++) { | |
if ($String1[$i] -ceq $String2[$i]) { | |
[void]$outputString.Append($String1[$i]) | |
} | |
else { break } | |
} | |
Write-Output $outputString.ToString() | |
} | |
foreach($mp3 in ($mp3s | ? {$_.FullName -match ' - '} | select -Skip 0)){ | |
$song = $mp3.BaseName | |
Write-Host $song | |
#$song = "Billy Idol - Flesh For Fanasy" | |
#$song = "All Saints - Never ever" | |
#$song = 'Armand Van Helden - My My My' | |
$iTunesSearchSong = $song -replace '\(' -replace '\)' -replace ' - ', ' ' -replace '\s','+' | |
$SearchSong = $song[0..49] -join "" -replace '\(' -replace '\)' -replace ' - ', ' ' -replace '\s','+' -replace "'", "%27" | |
$MatchSong = $song -replace ' - ', '&titel=' | |
$Composer = $null;$year = $null;$Label = $null | |
$media = [TagLib.File]::Create($mp3.FullName) | |
#Check iTunes for music Label information | |
$uri = "https://itunes.apple.com/search?term=$iTunesSearchSong&country=au&entity=song" | |
$x = Invoke-WebRequest -Uri $uri | |
$iTunesResults = ($x.Content | ConvertFrom-Json).results | |
if($iTunesResults) | |
{ | |
$y = Invoke-WebRequest -Uri $iTunesResults[0].trackViewUrl | |
$iTunesSongCopyright = ($y.ParsedHtml.getElementsByTagName('li') | ? {$_.ClassName -eq 'copyright'}).innerText -replace '℗ ' | |
$null = $iTunesSongCopyright -match '(?<!\d)[a-z|A-Z].*';$Label = $matches[0] | |
} | |
#The check australian-charts for Composer & Year infomation | |
$domain = 'http://staff.australian-charts.com/' | |
$uri = $domain + "search.asp?cat=s&search=$SearchSong" | |
$x = Invoke-WebRequest -Uri $uri | |
# Check all possible results - generally starts at 462 | |
$PossibleResults = $x.AllElements | select -Skip 400 -First 122 | ? {$_.class -eq 'text' -and $_.tagname -eq 'td' -and $_.outerText -match '[a-z|A-Z]{1,}'} | |
if($PossibleResults) | |
{ | |
# Match the search results | |
$MatchedResults = CheckSong $Matchsong ($PossibleResults.innerHTML | sort -Descending) | |
# Find the best search result | |
$BestResult = ($MatchedResults | sort DistanceNumber -Descending | select -First 1).song | |
# Best Element | |
$BestElement = $x.AllElements | select -Skip 400 -First 122 | ? {$_.class -eq 'text' -and $_.tagname -eq 'td' -and $_.innerHTML -eq $BestResult} | |
# Find the correct URL | |
$null = $BestElement.outerHTML -match 'A.href="(.*)"';$resultURL = $domain + $Matches[1] | |
$resultURL = $resultURL -replace("&","&") -replace('"','"') | |
$y = Invoke-WebRequest -Uri $resultURL | |
$Element = ($y.AllElements | ? {$_.tagName -eq 'HTML'}) | |
if($Element.innerText -match 'Music\/Lyrics:(.*)') | |
{ | |
$startpos = $Element.innertext.IndexOf("Lyrics:") + 7 | |
$error.clear() | |
try | |
{ | |
$endpos = $Element.innertext.IndexOf("Producer:") -3 | |
$null = $Element.innertext.substring($endpos) | |
} | |
catch { "Error occured" } | |
if ($error) | |
{ | |
$endpos = $Element.innertext.IndexOf("World wide:") -3 | |
$null = $Element.innertext.substring($endpos) | |
} | |
$composer = $Element.innertext.substring($startpos,($endpos - $startpos)) | |
#even the below line will give the same result as the above line if uncommented | |
#$composer = $Element.innertext[$startpos..$endpos] -join "" | |
$composer = $composer -replace '\n', ', ' | |
if($composer){$media.tag.Composers = $composer;Write-Host $Composer} | |
} else { | |
$Composer = $null | |
} | |
if($Element.innerText -match 'Year:(.*)') | |
{ | |
$null = $Element.innerText -match 'Year:(.*)' | |
$Year = $Matches[1] | |
if($Year){$media.tag.year = $Year;Write-Host $Year} | |
} else { | |
$Year = $null | |
} | |
if(!($iTunesResults)) | |
{ | |
if($Element.innerText -match 'Label:(.*)') | |
{ | |
$Element.innerText -match 'Label:(.*)' | |
$Label = $Matches[1] | |
} else | |
{ | |
$Label = $null | |
} | |
} | |
} | |
if($Label){$media.tag.Publisher = $Label;Write-Host $Label`n} | |
$media.Save() | |
Start-Sleep -Seconds 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment