Last active
February 27, 2026 00:58
-
-
Save darianmiller/254cc8851d9759e22ac0a886bf7bae60 to your computer and use it in GitHub Desktop.
Invoke-WebArchiveSnapshot.ps1 [v1.1] A self-contained PowerShell script that submits URLs to the Wayback Machine and archive.ph simultaneously, resolves snapshot links, and outputs ready-to-paste markdown for your documentation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <# | |
| .SYNOPSIS | |
| Archive a URL to Wayback Machine and archive.ph and emit ready-to-paste Markdown. | |
| .DESCRIPTION | |
| Saves a snapshot of the given URL to two independent archiving services: | |
| - Wayback Machine (web.archive.org) -- documented save API, reliable | |
| - archive.ph -- plain GET submission, no official API | |
| Fetches the page title from the target URL and uses it as the link text | |
| in the generated markdown. Falls back to the URL if the title cannot be | |
| retrieved. | |
| Prints ready-to-paste markdown with both snapshot URLs for use in | |
| documentation link sections. | |
| Quality-of-life: | |
| - When -PassThru is not specified, the script emits the markdown to the | |
| pipeline (Write-Output) so callers can capture it. | |
| - To avoid "double print" (Write-Host + pipeline output), the markdown | |
| is NOT written via Write-Host when stdout is redirected, or when | |
| -NoHostMarkdown is specified. | |
| .PARAMETER Url | |
| The URL to archive. Must be an absolute http/https URL. | |
| .PARAMETER CopyToClipboard | |
| If specified, copies the generated markdown to the clipboard after printing. | |
| .PARAMETER PassThru | |
| If specified, outputs an object with resolved URLs and markdown (pipeline-friendly). | |
| .PARAMETER NoHostMarkdown | |
| If specified, does not print the markdown via Write-Host. The markdown is still | |
| emitted to the pipeline unless -PassThru is specified. | |
| .PARAMETER CI | |
| If specified, exits with a non-zero code when one or more archive submissions | |
| fail to resolve. Also enabled automatically when running under common CI | |
| environments (e.g., GITHUB_ACTIONS or CI=true). | |
| .EXAMPLE | |
| pwsh ./Invoke-WebArchiveSnapshot.ps1 -Url 'https://example.com/some-post' | |
| .EXAMPLE | |
| # Capture the markdown output without double-print | |
| $md = pwsh ./Invoke-WebArchiveSnapshot.ps1 -Url 'https://example.com/some-post' -NoHostMarkdown | |
| .EXAMPLE | |
| pwsh ./Invoke-WebArchiveSnapshot.ps1 -Url 'https://example.com/some-post' -CopyToClipboard | |
| .EXAMPLE | |
| pwsh ./Invoke-WebArchiveSnapshot.ps1 -Url 'https://example.com/some-post' -CI | |
| .NOTES | |
| V1.1: 2026-02-26 | |
| Source: https://gist.github.com/darianmiller | |
| Wayback Machine uses a documented, stable save API. After submission, this | |
| script attempts to resolve a concrete snapshot URL using either the | |
| Content-Location header (best) or the availability API (closest snapshot). | |
| If a specific snapshot cannot be resolved quickly, the script provides a | |
| stable "search snapshots" URL as a fallback. | |
| Requires PowerShell 7+. Not tested on Windows PowerShell 5.1. | |
| #> | |
| [CmdletBinding()] | |
| param( | |
| [Parameter(Mandatory=$true)] | |
| [string]$Url, | |
| [Parameter(Mandatory=$false)] | |
| [switch]$CopyToClipboard, | |
| [Parameter(Mandatory=$false)] | |
| [switch]$PassThru, | |
| [Parameter(Mandatory=$false)] | |
| [switch]$NoHostMarkdown, | |
| [Parameter(Mandatory=$false)] | |
| [switch]$CI | |
| ) | |
| Set-StrictMode -Version Latest | |
| $ErrorActionPreference = 'Stop' | |
| $ProgressPreference = 'SilentlyContinue' | |
| $ScriptVersion = '1.0.0' | |
| function Write-Section { | |
| param([Parameter(Mandatory=$true)][string]$Title) | |
| Write-Host "" | |
| Write-Host $Title -ForegroundColor Cyan | |
| Write-Host ("-" * $Title.Length) -ForegroundColor Cyan | |
| } | |
| function Write-Result { | |
| param( | |
| [Parameter(Mandatory=$true)][string]$Label, | |
| [Parameter(Mandatory=$true)][string]$Value, | |
| [Parameter(Mandatory=$false)][bool]$Success = $true | |
| ) | |
| $color = if ($Success) { 'Green' } else { 'Yellow' } | |
| Write-Host (" {0,-12} {1}" -f $Label, $Value) -ForegroundColor $color | |
| } | |
| function Get-FinalResponseUriString { | |
| param([Parameter(Mandatory=$true)]$WebResponse) | |
| try { | |
| # PS7+: .BaseResponse is HttpResponseMessage, may have RequestMessage | |
| if ($null -ne $WebResponse.BaseResponse) { | |
| if ($null -ne $WebResponse.BaseResponse.ResponseUri) { | |
| return $WebResponse.BaseResponse.ResponseUri.ToString() | |
| } | |
| if ($null -ne $WebResponse.BaseResponse.RequestMessage -and | |
| $null -ne $WebResponse.BaseResponse.RequestMessage.RequestUri) { | |
| return $WebResponse.BaseResponse.RequestMessage.RequestUri.ToString() | |
| } | |
| } | |
| } catch { | |
| # ignore | |
| } | |
| return $null | |
| } | |
| function Get-HeaderValue { | |
| param( | |
| [Parameter(Mandatory=$true)]$Headers, | |
| [Parameter(Mandatory=$true)][string]$Name | |
| ) | |
| try { | |
| if ($null -ne $Headers -and $Headers.ContainsKey($Name)) { | |
| $v = $Headers[$Name] | |
| if ($v -is [System.Array]) { return [string]$v[0] } | |
| return [string]$v | |
| } | |
| } catch { | |
| # ignore | |
| } | |
| return $null | |
| } | |
| function Resolve-ArchivePhUrlCandidate { | |
| param( | |
| [AllowNull()] | |
| [Parameter(Mandatory=$false)] | |
| [string]$Candidate = '' | |
| ) | |
| if ([string]::IsNullOrWhiteSpace($Candidate)) { return $null } | |
| # If relative redirect, resolve against archive.ph. | |
| if ($Candidate.StartsWith('/')) { | |
| try { | |
| return ([Uri]::new([Uri]'https://archive.ph', $Candidate)).AbsoluteUri | |
| } catch { | |
| return $null | |
| } | |
| } | |
| return $Candidate | |
| } | |
| function Escape-MarkdownLinkText { | |
| param([Parameter(Mandatory=$true)][string]$Text) | |
| # Minimal escapes to avoid breaking "[]" link text. | |
| # Markdown parsers vary; escaping ] is the most important. | |
| $t = $Text -replace '\]', '\]' | |
| $t = $t -replace '\[', '\[' | |
| return $t | |
| } | |
| function Normalize-Url { | |
| param([Parameter(Mandatory=$true)][string]$InputUrl) | |
| $trimmed = $InputUrl.Trim() | |
| # Attempt to coerce to absolute URI (must include scheme). | |
| $uri = $null | |
| if (-not [Uri]::TryCreate($trimmed, [UriKind]::Absolute, [ref]$uri)) { | |
| throw "Url must be an absolute URL including scheme (e.g., https://...). Value: $trimmed" | |
| } | |
| if ($uri.Scheme -ne 'http' -and $uri.Scheme -ne 'https') { | |
| throw "Url must be http or https. Value: $trimmed" | |
| } | |
| return $uri.AbsoluteUri | |
| } | |
| function Should-HostMarkdown { | |
| param( | |
| [Parameter(Mandatory=$true)][switch]$NoHostMarkdown, | |
| [Parameter(Mandatory=$true)][switch]$PassThru | |
| ) | |
| if ($PassThru) { | |
| # PassThru returns an object; host markdown is useful and does not duplicate pipeline text. | |
| return $true | |
| } | |
| if ($NoHostMarkdown) { return $false } | |
| # If stdout is redirected (file, pipe to another process, CI capture), avoid Write-Host to prevent duplication. | |
| try { | |
| if ([Console]::IsOutputRedirected) { return $false } | |
| } catch { | |
| # ignore and default to host output | |
| } | |
| return $true | |
| } | |
| # ----------------------------------------------------------------------- | |
| # Normalize and validate URL | |
| # ----------------------------------------------------------------------- | |
| $Url = Normalize-Url -InputUrl $Url | |
| Write-Host "" | |
| Write-Host ("IdeasAwakened: Archive Snapshot Tool - v{0}" -f $ScriptVersion) -ForegroundColor Cyan | |
| Write-Host "------------------------------------------------" -ForegroundColor Cyan | |
| Write-Host (" URL: {0}" -f $Url) | |
| # ----------------------------------------------------------------------- | |
| # Fetch page title for use in generated markdown | |
| # ----------------------------------------------------------------------- | |
| $pageTitle = $null | |
| try { | |
| $titleResponse = Invoke-WebRequest -Uri $Url -TimeoutSec 15 -MaximumRedirection 5 | |
| # Tolerant title extraction: multiline + case-insensitive. | |
| $m = [regex]::Match($titleResponse.Content, '(?is)<title[^>]*>\s*(.*?)\s*</title>') | |
| if ($m.Success) { | |
| $rawTitle = $m.Groups[1].Value.Trim() | |
| if (-not [string]::IsNullOrWhiteSpace($rawTitle)) { | |
| $pageTitle = [System.Net.WebUtility]::HtmlDecode($rawTitle) | |
| Write-Host (" Title: {0}" -f $pageTitle) -ForegroundColor DarkGray | |
| } | |
| } | |
| } catch { | |
| Write-Host " Could not fetch page title. Using URL as link text." -ForegroundColor DarkGray | |
| } | |
| # ----------------------------------------------------------------------- | |
| # Wayback Machine | |
| # ----------------------------------------------------------------------- | |
| Write-Section "Wayback Machine (web.archive.org)" | |
| $waybackSnapshotUrl = $null # concrete snapshot if known | |
| $waybackFallbackUrl = "https://web.archive.org/web/*/$Url" # stable fallback (search) | |
| $waybackSaveUrl = "https://web.archive.org/save?url=$([Uri]::EscapeDataString($Url))" | |
| $waybackResolvedKind = $null # Content-Location | Closest | Search | Failed | |
| try { | |
| Write-Host " Submitting..." -ForegroundColor DarkGray | |
| $response = Invoke-WebRequest -Uri $waybackSaveUrl -Method GET -TimeoutSec 30 | |
| # Snapshot URL may come back immediately in Content-Location header | |
| $contentLocation = Get-HeaderValue -Headers $response.Headers -Name 'Content-Location' | |
| if (-not [string]::IsNullOrWhiteSpace($contentLocation)) { | |
| $waybackSnapshotUrl = "https://web.archive.org$contentLocation" | |
| $waybackResolvedKind = 'Content-Location' | |
| Write-Result -Label "Snapshot:" -Value $waybackSnapshotUrl -Success $true | |
| } else { | |
| # Poll the availability API to get a resolvable "closest" snapshot URL. | |
| # Note: "closest" may be older than the one just submitted; treat as best-effort. | |
| Write-Host " Snapshot queued. Resolving closest snapshot (up to 30s)..." -ForegroundColor DarkGray | |
| $availabilityUri = "https://archive.org/wayback/available?url=$([Uri]::EscapeDataString($Url))" | |
| $deadline = (Get-Date).AddSeconds(30) | |
| while ((Get-Date) -lt $deadline) { | |
| Start-Sleep -Seconds 3 | |
| try { | |
| $availResponse = Invoke-WebRequest -Uri $availabilityUri -TimeoutSec 10 | |
| $availData = $availResponse.Content | ConvertFrom-Json | |
| $closest = $availData.archived_snapshots.closest | |
| if ($null -ne $closest -and $closest.available -eq $true -and -not [string]::IsNullOrWhiteSpace($closest.url)) { | |
| $waybackSnapshotUrl = ($closest.url -replace '^http://', 'https://') | |
| $waybackResolvedKind = 'Closest' | |
| Write-Result -Label "Closest:" -Value $waybackSnapshotUrl -Success $true | |
| break | |
| } | |
| } catch { | |
| # Non-fatal polling failure; keep trying until deadline. | |
| } | |
| } | |
| if ($null -eq $waybackSnapshotUrl) { | |
| $waybackResolvedKind = 'Search' | |
| Write-Result -Label "Search:" -Value $waybackFallbackUrl -Success $true | |
| Write-Host " Could not resolve a concrete snapshot URL within 30s." -ForegroundColor DarkGray | |
| Write-Host " Using search URL which lists all snapshots for this page." -ForegroundColor DarkGray | |
| } else { | |
| # Also print the stable fallback search URL (useful when closest is not the new snapshot) | |
| Write-Result -Label "Search:" -Value $waybackFallbackUrl -Success $true | |
| } | |
| } | |
| } catch { | |
| $waybackResolvedKind = 'Failed' | |
| Write-Result -Label "Failed:" -Value $_.Exception.Message -Success $false | |
| Write-Result -Label "Manual:" -Value $waybackSaveUrl -Success $false | |
| } | |
| # ----------------------------------------------------------------------- | |
| # archive.ph | |
| # ----------------------------------------------------------------------- | |
| Write-Section "archive.ph" | |
| $archivePhSnapshotUrl = $null # best effort (may be processing page) | |
| $archivePhManualUrl = "https://archive.ph/?url=$([Uri]::EscapeDataString($Url))" | |
| try { | |
| $submitUri = "https://archive.ph/submit/?url=$([Uri]::EscapeDataString($Url))" | |
| Write-Host " Submitting..." -ForegroundColor DarkGray | |
| $submitResponse = Invoke-WebRequest -Uri $submitUri -Method GET -TimeoutSec 60 -MaximumRedirection 5 | |
| # Try multiple sources for the "final" URL, including Location header. | |
| $candidateFinal = Get-FinalResponseUriString -WebResponse $submitResponse | |
| $candidateLoc = Get-HeaderValue -Headers $submitResponse.Headers -Name 'Location' | |
| $finalUrl = $null | |
| foreach ($c in @($candidateFinal, $candidateLoc)) { | |
| $r = Resolve-ArchivePhUrlCandidate -Candidate $c | |
| if (-not [string]::IsNullOrWhiteSpace($r) -and $r -match 'archive\.ph') { | |
| $finalUrl = $r | |
| break | |
| } | |
| } | |
| if ([string]::IsNullOrWhiteSpace($finalUrl)) { | |
| throw "Could not determine final redirect URL from archive.ph response." | |
| } | |
| if ($finalUrl -match 'archive\.ph/[a-zA-Z0-9]+$') { | |
| $archivePhSnapshotUrl = $finalUrl | |
| Write-Result -Label "Snapshot:" -Value $archivePhSnapshotUrl -Success $true | |
| } elseif ($finalUrl -match 'archive\.ph') { | |
| $archivePhSnapshotUrl = $finalUrl | |
| Write-Result -Label "Processing:" -Value $archivePhSnapshotUrl -Success $true | |
| Write-Host " Page may still be processing. Check the URL above for the final snapshot." -ForegroundColor DarkGray | |
| } else { | |
| throw "Submission did not resolve to an archive.ph URL. Final URL: $finalUrl" | |
| } | |
| } catch { | |
| $errMsg = $_.Exception.Message | |
| Write-Result -Label "Failed:" -Value $errMsg -Success $false | |
| Write-Result -Label "Manual:" -Value $archivePhManualUrl -Success $false | |
| if ($errMsg -match '429') { | |
| Write-Host " Rate limited. Wait a few minutes and try again, or use the manual URL above." -ForegroundColor DarkGray | |
| } | |
| } | |
| # ----------------------------------------------------------------------- | |
| # Markdown | |
| # ----------------------------------------------------------------------- | |
| Write-Section "Markdown for documentation" | |
| $linkTextRaw = if ($pageTitle) { $pageTitle } else { $Url } | |
| $linkText = Escape-MarkdownLinkText -Text $linkTextRaw | |
| $waybackLink = if ($waybackSnapshotUrl) { $waybackSnapshotUrl } else { $waybackFallbackUrl } | |
| $archiveLink = if ($archivePhSnapshotUrl) { $archivePhSnapshotUrl } else { $archivePhManualUrl } | |
| $markdown = @" | |
| - [$linkText]($Url) | |
| - [Wayback Machine snapshot]($waybackLink) | |
| - [archive.ph snapshot]($archiveLink) | |
| "@ | |
| $hostMarkdown = Should-HostMarkdown -NoHostMarkdown:$NoHostMarkdown -PassThru:$PassThru | |
| if ($hostMarkdown) { | |
| Write-Host $markdown -ForegroundColor White | |
| } | |
| if ($CopyToClipboard) { | |
| $markdown | Set-Clipboard | |
| Write-Host " Markdown copied to clipboard." -ForegroundColor Green | |
| } | |
| Write-Host "" | |
| $resultObject = [pscustomobject]@{ | |
| Url = $Url | |
| Title = $pageTitle | |
| MarkdownLinkText = $linkTextRaw | |
| WaybackSnapshotUrl = $waybackSnapshotUrl | |
| WaybackKind = $waybackResolvedKind | |
| WaybackSearchUrl = $waybackFallbackUrl | |
| ArchivePhUrl = $archivePhSnapshotUrl | |
| ArchivePhManualUrl = $archivePhManualUrl | |
| Markdown = $markdown | |
| } | |
| if ($PassThru) { | |
| $resultObject | |
| } else { | |
| # Emit markdown to pipeline for easy capture in scripts. | |
| Write-Output $markdown | |
| } | |
| # CI exit behavior (opt-in or auto-detect common CI environments) | |
| $IsCiEnv = $false | |
| if ($CI) { $IsCiEnv = $true } | |
| elseif ($env:GITHUB_ACTIONS -eq 'true') { $IsCiEnv = $true } | |
| elseif ($env:CI -eq 'true') { $IsCiEnv = $true } | |
| if ($IsCiEnv) { | |
| $failCount = 0 | |
| if ($waybackResolvedKind -eq 'Failed' -or [string]::IsNullOrWhiteSpace($waybackLink)) { $failCount++ } | |
| if ($null -eq $archivePhSnapshotUrl) { $failCount++ } | |
| exit $failCount | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment