Last active May 22, 2022 15:18
This script checks for invalid books in the calibre library by looking for empty covers and then searches for the original book file in the downloads folder
Set-Location C:\CalibrePortable\Calibre\
$calibreNS = @{ 'dc' = ''
'calibre' = ''
$formatLessBookIds = [int[]](&.\calibredb.exe search 'formats:false and (identifiers:"=isbn:")' --library-path="..\Calibre Library\" | ForEach-Object { $_ -split ',' })
$formatLessBooks = Import-Clixml -Path .\formatLessBooks.xml
if($null -eq $formatLessBooks -or $null -eq $formatLessBooks.Hash -or $formatLessBooks.Hash -ne $formatLessBookIds.GetHashCode())
$formatLessBooks = @{ Hash = ($formatLessBookIds.GetHashCode());Books = ($formatLessBookIds | ForEach-Object { [xml](&.\calibredb.exe show_metadata --as-opf $PSItem --library-path="..\Calibre Library\") } | Where-Object { [string]::IsNullOrWhiteSpace((&.\calibredb.exe search formats:true title:`"$(Select-Xml -Xml $PSItem -Namespace $calibreNS -XPath '//dc:title' | ForEach-Object { $_.Node.InnerXml })`" author:`"$((Select-Xml -Xml $PSItem -Namespace $calibreNS -XPath '//dc:creator' | ForEach-Object { $_.Node.InnerXml }) -join ' & ')`" --library-path="..\Calibre Library\" 2>$null | ForEach-Object { $_ -split ',' } ))})}
Export-Clixml -InputObject $formatLessBooks -Path .\formatLessBooks.xml
$mutex = New-Object System.Threading.Mutex -ArgumentList @($false, 'calibre-portable-update-metadata')
$formatLessBooks.Books | ForEach-Object -ThrottleLimit 16 -Parallel {
$mutex = $using:mutex
$calibreNS = $using:calibreNS
Set-Location C:\CalibrePortable\Calibre\
$title = Select-Xml -Xml $PSItem -Namespace $calibreNS -XPath '//dc:title' | ForEach-Object { $_.Node.InnerXml }
$isbn = Select-Xml -Xml $PSItem -Namespace $calibreNS -XPath '//dc:identifier' | Where-Object -FilterScript { $_.Node.scheme -eq 'ISBN' } | ForEach-Object { $_.Node.'#text' }
$id = Select-Xml -Xml $PSItem -Namespace $calibreNS -XPath '//dc:identifier' | Where-Object -FilterScript { $_.Node.scheme -eq 'Calibre' } | ForEach-Object { $_.Node.'#text' }
if (-not [String]::IsNullOrWhiteSpace( $isbn ) -and -not [String]::IsNullOrWhiteSpace( $id ))
$sleepTime = 3
$uri = "$isbn&open=0&res=25&view=simple&phrase=1&column=identifier&sort=year&sortmode=DESC"
$request = Invoke-WebRequest -Uri $uri 2>$null
while ($request.StatusCode -ne 200)
Start-Sleep -Seconds ($sleepTime *= 2)
$request = Invoke-WebRequest -Uri $uri 2>$null
Get-Package -Name 'AngleSharp' | ForEach-Object { Split-Path $_.Source } | Get-ChildItem -Filter '*.dll' -Recurse | Where-Object { $_ -Like '*standard*' } | Select-Object -Last 1 | ForEach-Object { Add-Type -Path $_ -ErrorAction SilentlyContinue -Verbose }
$parser = [AngleSharp.Html.Parser.HtmlParser]::new()
$parsedContent = $parser.ParseDocument($request.Content)
$formats = New-Object -TypeName 'System.Collections.Generic.HashSet[string]'
$parsedContent.QuerySelectorAll('body > table.c > tbody > tr') | Select-Object -Skip 1 | ForEach-Object {
$format = $_.QuerySelector('td:nth-child(9)').InnerHtml
if (-not([String]::IsNullOrWhiteSpace($format)) -and $formats.Add($format))
$requestDL = Invoke-WebRequest -Uri ($_.QuerySelector('td:nth-child(10) > a').GetAttribute('href'))
$parsedDLContent = $parser.ParseDocument($requestDL.Content)
$uri = [uri]::new(($parsedDLContent.QuerySelector('#download > h2 > a').href))
$md5 = $uri.segments[3] -replace '/', ''
$filename = Join-Path $env:TEMP "$id.$($uri.segments[-1] -split '\.' | Select-Object -Last 1 )"
if (-not(Test-Path $filename -PathType Leaf) -or ($md5.ToUpperInvariant() -ne (Get-FileHash -Path $filename -Algorithm MD5).Hash))
$request = Invoke-WebRequest -Uri $uri -OutFile $filename -Resume 2>$null
while ($request.StatusCode -ne 200)
Start-Sleep -Seconds ($sleepTime *= 2)
$request = Invoke-WebRequest -Uri $uri -OutFile $filename -Resume 2>$null
try {
$mutex.WaitOne() | Out-Null
$ebookFile = Get-Item $filename
&.\calibredb.exe add_format $id "$ebookFile" --library-path="..\Calibre Library\"
@{Id=$id; Ebook=$ebookFile; Title=$title; Filename=$filename} | Format-Table -AutoSize -Wrap | Write-Output
} finally {
$mutex.ReleaseMutex() | Out-Null
$calibreLibraryPath = "$env:USERPROFILE\Calibre Library"
#Install pre-requisite module Communary.PASM for approximate string matching if necessary
if (-not (Get-Module -ListAvailable -Name Communary.PASM)) {
$m="Installing missing approximate string matching module Communary.PASM"
Write-Host $m
Write-Log -Message $m -Level "WARN" -logfile $log
# Check to see if we are currently running "as Administrator"
if ($myWindowsPrincipal.IsInRole($adminRole))
# We are running "as Administrator" - so change the title and background color to indicate this
$Host.UI.RawUI.WindowTitle = $myInvocation.MyCommand.Definition + "(Elevated)"
$Host.UI.RawUI.BackgroundColor = "DarkBlue"
Install-Module Communary.PASM
# We are not running "as Administrator" - so relaunch as administrator
# Create a new process object that starts PowerShell
$newProcess = new-object System.Diagnostics.ProcessStartInfo "PowerShell";
# Specify the current script path and name as a parameter
$newProcess.Arguments = $myInvocation.MyCommand.Definition;
# Indicate that the process should be elevated
$newProcess.Verb = "runas";
# Start the new process
# Exit from the current, unelevated, process
# exit
#Install pre-requisite pdfinfo by XpdfReader if necessary
if (-not (where.exe pdfinfo.exe)){
$m="Installing missing pdfinfo program by XpdfReader"
Write-Host $m
Write-Log -Message $m -Level "WARN" -logfile $log
$pdfinfoZipDl = $($(iwr -Uri '' -Method Get).ParsedHtml.links | foreach {$_.href | Select-String -Pattern 'xpdf-tools-win.*\.zip$' }).ToString()
(New-Object Net.WebClient).DownloadFile($pdfinfoZipDl,"$env:TEMP\");(new-object -com shell.application).namespace('"$env:TEMP').CopyHere((new-object -com shell.application).namespace("$env:TEMP\").Items(),16)
$pdfInfoLoc = "$env:TEMP\" + $(Get-ChildItem -Path $env:TEMP -Name "xpdf-tools-win*" -Directory) + "\bin64\pdfinfo.exe"
else {
$pdfInfoLoc = where.exe pdfinfo.exe;
$log = New-TemporaryFile
Function Write-Log {
$Level = "INFO",
$Stamp = (Get-Date).toString("yyyy/MM/dd HH:mm:ss")
$Line = "$Stamp`t$Level`t$Message"
If($logfile) {
Add-Content $logfile -Value $Line
Else {
Write-Output $Line
$confidence = 0.90
$calibreNS = @{ "dc" = ""; "calibre" = "" };
$webNS = @{'xmlns'=""}
$files = Get-ChildItem -Path $env:USERPROFILE\Downloads -Recurse -File -Include *.AZW, *.AZW3, *.AZW4, *.CBZ, *.CBR, *.CBC, *.CHM, *.DJVU, *.DOCX, *.EPUB, *.FB2, *.FBZ, *.HTML, *.HTMLZ, *.LIT, *.LRF, *.MOBI, *.ODT, *.PDF, *.PRC, *.PDB, *.PML, *.RB, *.RTF, *.SNB, *.TCR, *.TXT, *.TXTZ ; #has $files.Count many elements
$ids = ([string](calibredb search cover:False)).Split(",");
$candidates = New-Object 'System.Collections.Generic.Dictionary[Float,System.IO.FileSystemInfo]'
foreach ($id in $ids) {
Write-Progress -Activity "Trying to find good copies of broken books" -Status "$([int16]$(100*$ids.IndexOf($id)/$ids.Count))% Complete:" -PercentComplete (100*$ids.IndexOf($id)/$ids.Count);
$err = New-TemporaryFile
$out = New-TemporaryFile
$calibreBookFilename = Get-ChildItem -Path $calibreLibraryPath -Include "*($id)" -Recurse
$book = [xml](calibredb.exe show_metadata --as-opf $id)
$author = Select-Xml -Xml $book -Namespace $calibreNS -XPath "//dc:creator" | ForEach-Object { $_.Node.Innerxml }
$title = Select-Xml -Xml $book -Namespace $calibreNS -XPath "//dc:title" | ForEach-Object { $_.Node.Innerxml }
$compositeName = $($(if($title.Equals("untitled")){$calibreBookFilename.Name}else{"- $author"}) + $(if($author.Equals("Unknown")){""}else{"- $author"}))
#if($title -ieq 'untitled' -and $author -ieq 'Unknown' -and $calibreBookFilename.Name -ieq 'untitled - Unknown')
foreach ($file in $files) {
Write-Progress -Activity "Looking for $title by $author" -Status "$([int16]$(100*$files.IndexOf($file)/$files.Count))% Complete:" -PercentComplete (100*$files.IndexOf($file)/$files.Count);
$compar = 1 - $(Get-JaccardDistance -a $compositeName -b $file.Name.Split(".")[0] -CaseSensitive)
if ($file.Name.Split(".")[0] -gt 0) {
$candidates.Add($compar,$file) | Out-Null
catch [ArgumentException]{
$m="'" + $file.Name.Split('.')[0] + "' had the same confidence as '" + $candidates[$compar].Name.Split('.')[0] + "'"
#Write-Host $m
Write-Log -Message $m -Level "DEBUG" -logfile $log
if (-not $candidates.GetEnumerator().Value -ceq $null -and $compar -ge (1-$confidence)) {
Write-Progress -Activity "Looking for $title by $author" -Status "Found $($replacement.Name) at $compar% confidence" -PercentComplete 100
Clear-Variable compar
$m="Looking for $title by $author"
Write-Host $m
Write-Log -Message $m -Level "INFO" -logfile $log
$bestGuess = $(($candidates.GetEnumerator() | sort -Property Key))[0]
$compar = $bestGuess.Key
$replacement = $bestGuess.Value;
$m="Found $($replacement.Name) at $compar% confidence"
Write-Host $m -NoNewLine -ForegroundColor DarkGreen
Write-Log -Message $m -Level "INFO" -logfile $log
if($compar -lt (1-$confidence)){
$m=" having low confidence `($compar`)"
Write-Host $m -ForegroundColor DarkYellow
Write-Log -Message $m -Level "INFO" -logfile $log
Start-Process -FilePath $pdfInfoLoc -ArgumentList "-meta `"$($replacement.FullName)`"" -NoNewWindow -RedirectStandardError $err -RedirectStandardOutput $out
if ( $(Select-String -InputObject $err -Pattern "(Error|Warning)") -ceq $null -and -not ($(Get-Content $out) -ceq $null)) {
$m="`nReplacing old file with new one"
Write-Host $m -ForegroundColor Green
Write-Log -Message $m -Level "INFO" -logfile $log
Write-Host $out -ForegroundColor Green
calibredb add_format $id "$($replacement.FullName)"
} else {
$m="`nThe file $($replacement.FullName) was corrupt.`nNo valid file found for $title by $author.`nRemoving book with ID $id from Calibre"
Write-Host $m -ForegroundColor DarkRed -BackgroundColor Black
Write-Log -Message $m -Level "WARN" -logfile $log
calibredb.exe remove $id
#else {
#$m="`nThe book with ID $id was corrupt, had no title or author. Its filename is $($calibreBookFilename.Name). It is thus impossible to replace.`nRemoving book with ID $id from Calibre"
#Write-Host $m -ForegroundColor DarkRed -BackgroundColor Black
#Write-Log -Message $m -Level "INFO" -logfile $log
#calibredb.exe remove $id
Remove-Item $out, $err -Force
Clear-Variable candidates, err, out
$candidates = New-Object 'System.Collections.Generic.Dictionary[Float,System.IO.FileSystemInfo]'
