Last active
December 21, 2020 17:51
-
-
Save bryanvine/e7488023edd1f971b4d47db1ad4ecd31 to your computer and use it in GitHub Desktop.
Powershell Script - MassDownloader - Efficient, Automated, Fault Tolerant, idempotent downloader with real time metrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
BV-MassDownloader.ps1 | |
https://www.bryanvine.com/2019/05/powershell-script-massdownloader.html | |
Author: Bryan Vine | |
Last updated: 05/12/2019 | |
Description: This function leverages BITS service for downloading many files from a source file with URLs, one per line. | |
The function multithreads, runs in the background asyncronously, can add more files to the queue realtime, | |
is idempotent, fault tolerant, can resume partially downloaded files. | |
#> | |
#Requires -version 3 | |
Function Get-DownloadFiles{ | |
<# | |
.SYNOPSIS | |
Downloads all the URLs listed in the source download list and saves them in the target destination. | |
.DESCRIPTION | |
This function leverages BITS service for downloading many files from a source file with URLs, one per line. | |
The function multithreads, runs in the background asyncronously, can add more files to the queue realtime, | |
is idempotent, fault tolerant, can resume partially downloaded files. | |
.PARAMETER source | |
What list to read in for source files. | |
.PARAMETER target | |
Where files will be downloaded to. | |
.PARAMETER extension | |
File extension that's being used to find the file name in the download URLs. | |
.PARAMETER maxconcurrent | |
Max concurrent downloads | |
.EXAMPLE | |
Get-DownloadFiles -source c:\sourceURLlist.txt -destination d:\downloads -extension mp3 | |
This will download all the URL links in c:\sourceULRlist.txt, parse out the mp3 files and save them to d:\downloads | |
.LINK | |
https://www.bryanvine.com/2019/05/powershell-script-massdownloader.html | |
.LINK | |
Start-BitsTransfer | |
.NOTES | |
Author: Bryan Vine | |
Last updated: 05/12/2019 | |
#> | |
[cmdletbinding()] | |
Param( | |
[string]$source = "c:\temp\downloadlist.txt", | |
[string]$target = "C:\Temp\videos", | |
[string]$extension = "mp4", | |
[int]$maxconcurrent = 10 | |
) | |
BEGIN{ | |
Import-Module BitsTransfer | |
#Loads list, removes duplicates & blank lines | |
$list = (gc $source) | select -unique |?{$_ -notlike $null} | |
#initialize working variables | |
$index = 0 | |
$Errors = 0 | |
} | |
PROCESS{ | |
while($index -lt $list.Count -or $transfering.count -gt 0){ | |
#get currently transferring jobs | |
$transfering = Get-BitsTransfer |select -ExpandProperty filelist |select -ExpandProperty remotename | |
#Calculate Progress activity for realtime display | |
$estimatedtotal = ($totalsize / ([math]::Min($maxconcurrent,($list.count - $Errors)))) * ($list.count - $Errors) | |
if($estimatedtotal -le 0){$estimatedtotal =1} | |
$estimatedremaining = $estimatedtotal - $totalcomplete - ($index - $transfering.count - $Errors) * ($totalsize / [math]::Min($maxconcurrent,($list.count - $Errors))) | |
$estimatedpercent = ((($estimatedtotal - $estimatedremaining) / $estimatedtotal) * 100) | |
if($estimatedpercent -lt 0){$estimatedpercent = 0} | |
Write-Progress -Activity "Downloading files from list: $source - saving to $target" -PercentComplete $estimatedpercent -Status ( | |
"Files downloaded: $($index - $transfering.count - $errors)/$($list.count) - " + | |
"Queue: $($transfering.count)/$maxconcurrent - " + | |
"Errors: $Errors - " + | |
"Total Download Speed: $totalspeed MB/sec - " + | |
"Percent: $([math]::Round($estimatedpercent,0)) %") | |
#fix index pointer for idempotency | |
if($index -eq 0){ | |
if($transfering.count -gt 1){ | |
$index = $list.IndexOf($transfering[-1]) + 1 | |
}elseif($transfering.count -eq 1){ | |
$index = $list.IndexOf($transfering) + 1 | |
} | |
} | |
#Stop adding if index is at the end | |
if($index -lt $list.count){ | |
#if queue isn't full, add another download job | |
if($transfering.count -lt $maxconcurrent){ | |
#Extract's file name from the URL | |
$filename = $list[$index].split("/").split("?") | ?{$_ -like "*$extension*"} |?{$_.Length -gt 3} |select -Last 1 | |
#Check if target file was already downloadeded | |
if(!(Test-Path "$target\$filename")){ | |
#Idempotency if script is stopped and re-ran while queue is still downloading | |
if($list[$index] -notin $transfering){ | |
#Add new download job | |
Start-BitsTransfer -Source $list[$index++] -Destination "$target\$filename" -Asynchronous |Out-Null | |
Start-Sleep -Seconds 1 | |
} | |
} | |
} | |
} | |
#Check for completed download jobs, remove from queue | |
Get-BitsTransfer | ?{$_.jobstate -like "transferred"} | Complete-BitsTransfer | |
#Check for errored download jobs, remove from queue | |
Get-BitsTransfer | ?{$_.jobstate -like "*Error*"} | %{ | |
$Errors++ | |
Write-Host -ForegroundColor DarkRed -BackgroundColor Black "Error Downloading: $($_.filelist.RemoteName)" | |
$_ | Remove-BitsTransfer | |
} | |
#Per loop variable initialization | |
$totalspeed = 0 | |
$totalsize = 1 | |
$totalcomplete = 0 | |
$id = 1 | |
#Per file metrics calculation & display | |
Get-BitsTransfer | ?{$_.jobstate -like "Transferring"} | %{ | |
$totalcomplete += $_.BytesTransferred | |
$totalsize += $_.BytesTotal | |
$PercentComplete = [math]::Round(($_.BytesTransferred * 100 / $_.BytesTotal),0) | |
$speed = [math]::Round(($_.BytesTransferred/1MB) / (($_.ModificationTime - $_.CreationTime).TotalSeconds),3) | |
$totalspeed += $speed | |
$timeleft = [math]::Round((($_.BytesTotal - $_.BytesTransferred)/1MB)/$speed,0) | |
$ETA = get-date (get-date).AddSeconds($timeleft) -UFormat "%I:%M:%S %p" | |
Write-Progress -id ($id++) -Activity "Downloading $(($_.filelist.LocalName).replace($target + '\',''))" -PercentComplete $PercentComplete -Status ( | |
"Downloaded: $([math]::Round($_.BytesTransferred/1MB,0))/$([math]::Round($_.BytesTotal/1MB,0))MB - " + | |
"Percent: $PercentComplete % - Speed: $speed MB/sec - " + | |
"Time left: $([timespan]::FromSeconds($timeleft).hours) hours $([timespan]::FromSeconds($timeleft).minutes) minutes $([timespan]::FromSeconds($timeleft).seconds) seconds - ETA: $ETA") | |
} | |
#Re-read file for new downloads | |
$list = (gc $source) | select -unique |?{$_ -notlike $null} | |
#loop delay | |
Start-Sleep -Seconds 1 | |
} | |
} | |
END{ | |
Write-Host -ForegroundColor DarkGreen -BackgroundColor Black "Downloads complete." | |
} | |
} | |
#Alias function to stop all downloads | |
Function Stop-DownloadFiles{ | |
Get-BitsTransfer | Remove-BitsTransfer | |
} | |
Get-DownloadFiles |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment