Skip to content

Instantly share code, notes, and snippets.

@uyriq
Last active July 10, 2024 14:53
Show Gist options
  • Save uyriq/338013336653b2b7c676a477046fc75e to your computer and use it in GitHub Desktop.
Save uyriq/338013336653b2b7c676a477046fc75e to your computer and use it in GitHub Desktop.
fix1252/1251 to UTF8 encoding for idv3tags
# .description: "This script reads the ID3 tags of MP3 files in a directory, corrects the encoding from WINDOWS-1251 (incorrectly displayed as WINDOWS-1252) to UTF-8, and saves the corrected tags back to the files."
# .prerequisites: "To run this script, you must first obtain TagLibSharp.dll. This can be done by downloading it from https://nuget.org/packages/TagLibSharp/2.3.0 or by compiling the sources available at https://github.com/mono/taglib-sharp."
# .how_to_use: "Execute this script in the directory containing the MP3 files you wish to correct. Ensure TagLibSharp.dll is accessible to the script, adjusting the library loading path as necessary."
# You can optionally pass args as -AllArtists, -AllAlbum, -AllPicture, -AllComment to set the appropriate meta properties for a group of files
param(
# assign $null by default
[string]$AllArtist = $null,
[string]$AllAlbum = $null,
[string]$AllPicture = $null,
[string]$AllComment = $null
)
Add-Type -Path ".\TagLibSharp.dll"
# Register the code page provider to ensure Windows-1251 is available
# Check if running in PowerShell Core (version 6 and above)
if ($PSVersionTable.PSEdition -eq 'Core') {
# Register the code page provider to ensure Windows-1251 is available
[System.Text.Encoding]::RegisterProvider([System.Text.CodePagesEncodingProvider]::Instance)
}
else {
# obtain binary package from https://nuget.info/packages/System.Text.Encoding.CodePages/ (netstandard 2.0)
Add-Type -Path ".\System.Text.Encoding.CodePages.dll"
[System.Text.Encoding]::RegisterProvider([System.Text.CodePagesEncodingProvider]::Instance)
}
function Test-NeedsEncodingCorrection {
param (
[string]$text
)
# Define the allowed Cyrillic range and additional allowed characters
$cyrillicRangeStart = [int][char]'А' # U+0410
$cyrillicRangeEnd = [int][char]'я' #p U+044F
$additionalAllowedChars = @('!', '=', '-', '+', '~', ' ', ',', '.', '\', '(', ')', "'", '"') + ('0'..'9') | ForEach-Object { [int][char]$_ }
# Convert text to array of Unicode code points
# Add uppercase Latin alphabet (A-Z) lowercase Latin alphabet (a-z) and 0-9
$latinLowercase = [char[]]'QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm1234567890' | ForEach-Object { [int][char]$_ }
$additionalAllowedChars += $latinLowercase
$codePoints = $text.ToCharArray() | ForEach-Object { [int]$_ }
$needsCorrection = $false
# Check each character against the allowed range
foreach ($codePoint in $codePoints) {
$isAllowed = ($codePoint -ge $cyrillicRangeStart -and $codePoint -le $cyrillicRangeEnd) -or $additionalAllowedChars -contains $codePoint
# Write-Host "CodePoint: $codePoint IsAllowed: $isAllowed" uncommnet for debug purposes
if (-not $isAllowed) {
$needsCorrection = $true
break
}
}
# If all characters are within the allowed range, return false
return $needsCorrection
}
# Define the directory containing the MP3 files
$directoryPath = (Get-Location).Path
# Get all MP3 files in the directory
$mp3Files = Get-ChildItem -Path $directoryPath -Filter *.mp3
foreach ($file in $mp3Files) {
# Use TagLib# to read the MP3 file
$mp3 = [TagLib.File]::Create($file.FullName)
# display progress current file number of total files
# move cursor back to start of line and print normally it could be as a breeze
Write-Host $file.Name, "of total:", $mp3Files.Count, "`r" -NoNewline
# Read existing tags
$title = $mp3.Tag.Title
$album = $mp3.Tag.Album
# Assuming the incorrect encoding is Windows-1251, and we need to convert to UTF-8
# This step might need adjustments based on the actual encoding issues
# Correct encoding conversion logic
if (-not [string]::IsNullOrWhiteSpace($title) -and (Test-NeedsEncodingCorrection -text $title)) {
$bytesTitle = [System.Text.Encoding]::GetEncoding(1252).GetBytes($title)
$titleCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString($bytesTitle)
}
else {
$bytesTitle = $null #
$titleCorrected = $title
}
if ($AllComment -ne $null -and $AllComment -ne '') {
$commentCorrected = $AllComment
}
else {
if (-not [string]::IsNullOrWhiteSpace($mp3.Tag.Comment) -and (Test-NeedsEncodingCorrection -text ($mp3.Tag.Comment)) ) {
$commentCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString([System.Text.Encoding]::GetEncoding(1252).GetBytes($mp3.Tag.Comment))
}
else {
$commentCorrected = $mp3.Tag.Comment
}
}
if ($AllArtist -ne $null -and $AllArtist -ne '') {
$artistCorrected = $AllArtist
}
else {
$performers = $mp3.Tag.Performers -join ", "
if (-not [string]::IsNullOrWhiteSpace($performers) -and (Test-NeedsEncodingCorrection -text $performers)) {
$artistCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString([System.Text.Encoding]::GetEncoding(1252).GetBytes($performers))
}
else {
$artistCorrected = $performers # No conversion needed or performers is empty
}
}
if ($AllAlbum -ne $null -and $AllAlbum -ne '') {
$albumCorrected = $AllAlbum
}
else {
$album = $mp3.Tag.Album
if (-not [string]::IsNullOrWhiteSpace($album) -and (Test-NeedsEncodingCorrection -text $album)) {
$albumCorrected = [System.Text.Encoding]::GetEncoding(1251).GetString([System.Text.Encoding]::GetEncoding(1252).GetBytes($album))
}
else {
$albumCorrected = $album # No conversion needed or album is empty
}
}
# test if $AllPicture contains valid path to image file and if it does, add it to the mp3 file
if (-not [string]::IsNullOrWhiteSpace($AllPicture) -and (Test-Path $AllPicture -PathType Leaf)) {
$mp3.Tag.Pictures = [TagLib.Picture]::CreateFromPath($AllPicture)
}
# Update tags with corrected values
$mp3.Tag.Title = $titleCorrected
$mp3.Tag.Performers = $artistCorrected -split ", "
$mp3.Tag.Album = $albumCorrected
$mp3.Tag.Comment = $commentCorrected
# Save the changes
$mp3.Save()
# Dispose the MP3 object to free resources
$mp3.Dispose()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment