Instantly share code, notes, and snippets.
Last active
July 10, 2024 14:53
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save uyriq/338013336653b2b7c676a477046fc75e to your computer and use it in GitHub Desktop.
fix1252/1251 to UTF8 encoding for idv3tags
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# .description: "This script reads the ID3 tags of MP3 files in a directory, corrects the encoding from WINDOWS-1251 (incorrectly displayed as WINDOWS-1252) to UTF-8, and saves the corrected tags back to the files." | |
# .prerequisites: "To run this script, you must first obtain TagLibSharp.dll. This can be done by downloading it from https://nuget.org/packages/TagLibSharp/2.3.0 or by compiling the sources available at https://github.com/mono/taglib-sharp." | |
# .how_to_use: "Execute this script in the directory containing the MP3 files you wish to correct. Ensure TagLibSharp.dll is accessible to the script, adjusting the library loading path as necessary." | |
# You can optionally pass args as -AllArtists, -AllAlbum, -AllPicture, -AllComment to set the appropriate meta properties for a group of files | |
param( | |
# assign $null by default | |
[string]$AllArtist = $null, | |
[string]$AllAlbum = $null, | |
[string]$AllPicture = $null, | |
[string]$AllComment = $null | |
) | |
Add-Type -Path ".\TagLibSharp.dll" | |
# Register the code page provider to ensure Windows-1251 is available | |
# Check if running in PowerShell Core (version 6 and above) | |
if ($PSVersionTable.PSEdition -eq 'Core') { | |
# Register the code page provider to ensure Windows-1251 is available | |
[System.Text.Encoding]::RegisterProvider([System.Text.CodePagesEncodingProvider]::Instance) | |
} | |
else { | |
# obtain binary package from https://nuget.info/packages/System.Text.Encoding.CodePages/ (netstandard 2.0) | |
Add-Type -Path ".\System.Text.Encoding.CodePages.dll" | |
[System.Text.Encoding]::RegisterProvider([System.Text.CodePagesEncodingProvider]::Instance) | |
} | |
function Test-NeedsEncodingCorrection { | |
param ( | |
[string]$text | |
) | |
# Define the allowed Cyrillic range and additional allowed characters | |
$cyrillicRangeStart = [int][char]'А' # U+0410 | |
$cyrillicRangeEnd = [int][char]'я' #p U+044F | |
$additionalAllowedChars = @('!', '=', '-', '+', '~', ' ', ',', '.', '\', '(', ')', "'", '"') + ('0'..'9') | ForEach-Object { [int][char]$_ } | |
# Convert text to array of Unicode code points | |
# Add uppercase Latin alphabet (A-Z) lowercase Latin alphabet (a-z) and 0-9 | |
$latinLowercase = [char[]]'QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm1234567890' | ForEach-Object { [int][char]$_ } | |
$additionalAllowedChars += $latinLowercase | |
$codePoints = $text.ToCharArray() | ForEach-Object { [int]$_ } | |
$needsCorrection = $false | |
# Check each character against the allowed range | |
foreach ($codePoint in $codePoints) { | |
$isAllowed = ($codePoint -ge $cyrillicRangeStart -and $codePoint -le $cyrillicRangeEnd) -or $additionalAllowedChars -contains $codePoint | |
# Write-Host "CodePoint: $codePoint IsAllowed: $isAllowed" uncommnet for debug purposes | |
if (-not $isAllowed) { | |
$needsCorrection = $true | |
break | |
} | |
} | |
# If all characters are within the allowed range, return false | |
return $needsCorrection | |
} | |
# Define the directory containing the MP3 files | |
$directoryPath = (Get-Location).Path | |
# Get all MP3 files in the directory | |
$mp3Files = Get-ChildItem -Path $directoryPath -Filter *.mp3 | |
foreach ($file in $mp3Files) { | |
# Use TagLib# to read the MP3 file | |
$mp3 = [TagLib.File]::Create($file.FullName) | |
# display progress current file number of total files | |
# move cursor back to start of line and print normally it could be as a breeze | |
Write-Host $file.Name, "of total:", $mp3Files.Count, "`r" -NoNewline | |
# Read existing tags | |
$title = $mp3.Tag.Title | |
$album = $mp3.Tag.Album | |
# Assuming the incorrect encoding is Windows-1251, and we need to convert to UTF-8 | |
# This step might need adjustments based on the actual encoding issues | |
# Correct encoding conversion logic | |
if (-not [string]::IsNullOrWhiteSpace($title) -and (Test-NeedsEncodingCorrection -text $title)) { | |
$bytesTitle = [System.Text.Encoding]::GetEncoding(1252).GetBytes($title) | |
$titleCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString($bytesTitle) | |
} | |
else { | |
$bytesTitle = $null # | |
$titleCorrected = $title | |
} | |
if ($AllComment -ne $null -and $AllComment -ne '') { | |
$commentCorrected = $AllComment | |
} | |
else { | |
if (-not [string]::IsNullOrWhiteSpace($mp3.Tag.Comment) -and (Test-NeedsEncodingCorrection -text ($mp3.Tag.Comment)) ) { | |
$commentCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString([System.Text.Encoding]::GetEncoding(1252).GetBytes($mp3.Tag.Comment)) | |
} | |
else { | |
$commentCorrected = $mp3.Tag.Comment | |
} | |
} | |
if ($AllArtist -ne $null -and $AllArtist -ne '') { | |
$artistCorrected = $AllArtist | |
} | |
else { | |
$performers = $mp3.Tag.Performers -join ", " | |
if (-not [string]::IsNullOrWhiteSpace($performers) -and (Test-NeedsEncodingCorrection -text $performers)) { | |
$artistCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString([System.Text.Encoding]::GetEncoding(1252).GetBytes($performers)) | |
} | |
else { | |
$artistCorrected = $performers # No conversion needed or performers is empty | |
} | |
} | |
if ($AllAlbum -ne $null -and $AllAlbum -ne '') { | |
$albumCorrected = $AllAlbum | |
} | |
else { | |
$album = $mp3.Tag.Album | |
if (-not [string]::IsNullOrWhiteSpace($album) -and (Test-NeedsEncodingCorrection -text $album)) { | |
$albumCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString([System.Text.Encoding]::GetEncoding(1252).GetBytes($album)) | |
} | |
else { | |
$albumCorrected = $album # No conversion needed or album is empty | |
} | |
} | |
# test if $AllPicture contains valid path to image file and if it does, add it to the mp3 file | |
if (-not [string]::IsNullOrWhiteSpace($AllPicture) -and (Test-Path $AllPicture -PathType Leaf)) { | |
$mp3.Tag.Pictures = [TagLib.Picture]::CreateFromPath($AllPicture) | |
} | |
# Update tags with corrected values | |
$mp3.Tag.Title = $titleCorrected | |
$mp3.Tag.Performers = $artistCorrected -split ", " | |
$mp3.Tag.Album = $albumCorrected | |
$mp3.Tag.Comment = $commentCorrected | |
# Save the changes | |
$mp3.Save() | |
# Dispose the MP3 object to free resources | |
$mp3.Dispose() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment