Skip to content

Instantly share code, notes, and snippets.

@mavaddat
Last active May 24, 2024 08:08
Show Gist options
  • Save mavaddat/e09c5feafc1dd464502c1cc6aff13765 to your computer and use it in GitHub Desktop.
Save mavaddat/e09c5feafc1dd464502c1cc6aff13765 to your computer and use it in GitHub Desktop.
Small utility application to update Saxon-HE from GitHub
using System;
using System.IO;
using System.Net.Http;
using System.Security.Cryptography;
using System.Xml;
namespace SaxonicaDownloader
{
class Program
{
static void Main(string[] args)
{
const string mavenMetadataUrl = "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/maven-metadata.xml";
const string mavenRepositoryUrl = "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/";
// Download and verify maven-metadata.xml
string mavenMetadataPath = DownloadAndVerifyFile(mavenMetadataUrl, Path.GetTempPath());
// Parse maven-metadata.xml to get the latest version
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(mavenMetadataPath);
string latestVersion = xmlDoc.SelectSingleNode("/metadata/versioning/latest").InnerText;
// Download and verify JAR files
string versionUrl = $"{mavenRepositoryUrl}{latestVersion}/";
HttpClient client = new HttpClient();
HttpResponseMessage response = client.GetAsync(versionUrl).Result;
response.EnsureSuccessStatusCode();
string htmlContent = response.Content.ReadAsStringAsync().Result;
// Parse HTML content to get file links
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(htmlContent);
var fileLinks = htmlDoc.DocumentNode.SelectNodes("//a[contains(@href, '.jar')]");
foreach (HtmlAgilityPack.HtmlNode link in fileLinks)
{
string fileName = link.GetAttributeValue("href", string.Empty);
string fileUrl = $"{versionUrl}{fileName}";
// Download and verify file
string filePath = DownloadAndVerifyFile(fileUrl, Path.GetTempPath());
// Move the file to the destination folder
string destinationFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "Programs", "Saxonica");
Directory.CreateDirectory(destinationFolder);
File.Move(filePath, Path.Combine(destinationFolder, fileName));
}
}
static string DownloadAndVerifyFile(string fileUrl, string downloadFolder)
{
HttpClient client = new HttpClient();
string fileName = Path.GetFileName(fileUrl);
string filePath = Path.Combine(downloadFolder, fileName);
// Download the file
byte[] fileBytes = client.GetByteArrayAsync(fileUrl).Result;
File.WriteAllBytes(filePath, fileBytes);
// Verify file hashes
VerifyFileHashes(fileUrl, filePath);
return filePath;
}
static void VerifyFileHashes(string fileUrl, string filePath)
{
string[] hashAlgorithms = { "md5", "sha1", "sha256", "sha512" };
foreach (string hashAlgorithm in hashAlgorithms)
{
string hashUrl = $"{fileUrl}.{hashAlgorithm}";
HttpClient client = new HttpClient();
string remoteHash = client.GetStringAsync(hashUrl).Result;
using (var hashAlg = HashAlgorithm.Create(hashAlgorithm))
{
byte[] fileBytes = File.ReadAllBytes(filePath);
string localHash = BitConverter.ToString(hashAlg.ComputeHash(fileBytes)).Replace("-", string.Empty).ToLowerInvariant();
if (remoteHash != localHash)
{
throw new Exception($"Hash mismatch for {hashAlgorithm}: Remote={remoteHash}, Local={localHash}");
}
}
}
}
}
}
#Requires -Modules PSParseHTML
$GpgExec = Get-Command -Name gpg -All -CommandType Application | Select-Object -First 1
Invoke-RestMethod -Uri 'https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/maven-metadata.xml' -Headers @{ Accept = 'application/xml' } -OutFile "$env:TEMP\maven-metadata.xml"
$HashesMatch = $true
$MavenHashAlgorithms = 'md5', 'sha1', 'sha256', 'sha512'
foreach ($HashAlgorithm in $MavenHashAlgorithms) {
$RemoteHash = Invoke-RestMethod -Uri "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/maven-metadata.xml.${HashAlgorithm}"
$LocalHash = Get-FileHash -Path "$env:TEMP\maven-metadata.xml" -Algorithm $HashAlgorithm | Select-Object -ExpandProperty Hash
$HashesMatch = $HashesMatch -and $RemoteHash -ieq $LocalHash
if(-not $HashesMatch){
Write-Warning -Message "'maven-metadata.xml' hashes do not match for ${HashAlgorithm}"
Write-Warning -Message "Remote: ${RemoteHash}"
Write-Warning -Message "Local: ${LocalHash}"
break
}
}
if(-not $HashesMatch){
throw "Hashes do not match"
}
[xml]$SaxonicaVersions = Get-Content -Path "$env:TEMP\maven-metadata.xml"
$LatestVersion = $SaxonicaVersions.SelectSingleNode('/metadata/versioning/latest/text()').InnerText
$FileListing = Invoke-WebRequest -Uri "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/${LatestVersion}/" -Headers @{ Accept = 'text/html,application/xhtml+xml,application/xml' } | ConvertFrom-HTML -Engine AngleSharp
$Links = $FileListing.QuerySelectorAll("#contents > a").InnerHtml
<#
Each file has a GPG signature and hashes for each algorithm.
Each signature also has hashes for each algorithm.
For example, 'Saxon-HE-12.4.pom' has 'Saxon-HE-12.4.pom.asc',
'Saxon-HE-12.4.pom.asc.md5', 'Saxon-HE-12.4.pom.asc.sha1',
'Saxon-HE-12.4.pom.asc.sha256', 'Saxon-HE-12.4.pom.asc.sha512',
'Saxon-HE-12.4.pom.md5', 'Saxon-HE-12.4.pom.sha1',
'Saxon-HE-12.4.pom.sha256', 'Saxon-HE-12.4.pom.sha512'
Hence, we will iterate through the links and verify the signature hashes, the signature,
then the file hashes in that order
#>
foreach ($Link in $Links | Where-Object -FilterScript { $_ -imatch '\.jar$' }) {
$HashesMatch = $true
# Get the JAR file
Invoke-WebRequest -Uri "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/${LatestVersion}/${Link}" -Headers @{ 'Content-Type' = 'application/octet-stream' } -OutFile "$env:TEMP\$Link"
# Get the signature
$Signature = "${Link}.asc"
Invoke-RestMethod -Uri "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/${LatestVersion}/${Signature}" -OutFile "$env:TEMP\${Signature}"
# Check file hash for the signature
foreach($HashAlgorithm in $MavenHashAlgorithms){
$HashFile = "${Signature}.${HashAlgorithm}"
$RemoteHash = Invoke-RestMethod -Uri "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/${LatestVersion}/${HashFile}"
$LocalHash = Get-FileHash -Path "$env:TEMP\${Signature}" -Algorithm $HashAlgorithm | Select-Object -ExpandProperty Hash
$HashesMatch = $HashesMatch -and $RemoteHash -ieq $LocalHash
if(-not $HashesMatch){
Write-Warning -Message "'$Signature' hashes do not match for ${HashAlgorithm}"
Write-Warning -Message "Remote: ${RemoteHash}"
Write-Warning -Message "Local: ${LocalHash}"
break
}
}
if(-not $HashesMatch){
throw "Hashes do not match"
}
# Check the file hash for the JAR
foreach($HashAlgorithm in $MavenHashAlgorithms){
$HashFile = "${Link}.${HashAlgorithm}"
$RemoteHash = Invoke-RestMethod -Uri "https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/${LatestVersion}/${HashFile}"
$LocalHash = Get-FileHash -Path "$env:TEMP\$Link" -Algorithm $HashAlgorithm | Select-Object -ExpandProperty Hash
$HashesMatch = $HashesMatch -and $RemoteHash -ieq $LocalHash
if(-not $HashesMatch){
Write-Warning -Message "'$Link' hashes do not match for ${HashAlgorithm}"
Write-Warning -Message "Remote: ${RemoteHash}"
Write-Warning -Message "Local: ${LocalHash}"
break
}
}
if(-not $HashesMatch){
throw "Hashes do not match"
}
# Check the GPG signature
if($GpgExec){
# gpg --auto-key-locate hkps://keys.openpgp.org --keyserver hkps://keys.openpgp.org --keyserver-options auto-key-retrieve --verify "$env:TEMP\${Signature}" "$env:TEMP\$Link"
&$GpgExec --auto-key-locate keyserver --keyserver hkps://keys.openpgp.org --keyserver-options auto-key-retrieve --verify "$env:TEMP\${Signature}" "$env:TEMP\$Link"
if($LASTEXITCODE -ne 0){
throw "GPG verification failed"
}
}
# Move the JAR file to $env:LOCALAPPDATA\Programs\Saxonica
Move-Item -Path "$env:TEMP\$Link" -Destination "$env:LOCALAPPDATA\Programs\Saxonica" -Force
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment