Last active
June 8, 2023 01:37
-
-
Save mklement0/e8cabb620342af37ae7d0faecba7d588 to your computer and use it in GitHub Desktop.
PowerShell benchmarks for specific Stack Overflow questions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmarks for the answers at https://stackoverflow.com/q/75174855/45375. | |
# Specify how many sample data rows to use (use multiples of 10) | |
$totalRowCount = 1000 | |
# How many runs to average. | |
# Note: With values above 15 you'll start to see the effects of on-demand compilation. | |
$runCount = 10 | |
# Download and define function `Time-Command` on demand (will prompt). | |
# To be safe, inspect the source code at the specified URL first. | |
if (-not (Get-Command -ErrorAction Ignore Time-Command)) { | |
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1' | |
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 } | |
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null | |
if (-not ${function:Time-Command}) { exit 2 } | |
} | |
# Create a temporary file with sample input data. | |
$rows = @( | |
'"1011","01/16/2023","1/16/23 11:04 PM","1/17/23 6:52 AM"' | |
'"1012","01/16/2023","1/16/23 11:18 PM","1/17/23 6:05 AM"' | |
'"1012","01/17/2023","1/17/23 10:49 PM","1/18/23 7:26 AM"' | |
'"1021","01/16/2023","1/16/23 11:18 PM","1/17/23 6:04 AM"' | |
'"1021","01/17/2023","1/17/23 10:46 PM","1/18/23 8:12 AM"' | |
'"10261","01/16/2023","1/16/23 6:02 AM","1/16/23 12:01 PM"' | |
'"10262","01/18/2023","1/18/23 6:03 AM","1/18/23 12:02 PM"' | |
'"10263","01/18/2023","1/18/23 11:04 AM","1/19/23 00:03 AM"' | |
'"10262","01/19/2023","1/19/23 11:05 PM","1/20/23 07:03 AM"' | |
'"1011","01/19/2023","1/19/23 2:06 PM","1/19/23 11:04 PM"' | |
) | |
# Multiplication factor for the static array of sample rows. | |
# Note: Total row count won | |
$multiplier = $totalRowCount / 10 | |
# Save to file... | |
Set-Content ($tempFile = New-TemporaryFile).FullName -Value '"Employee Id","Work Date","In","Out"', ($rows * $multiplier) | |
# ... and warm up the cache. | |
$null = Get-Content -Raw $tempFile | |
# Define script blocks with the commands to time. | |
$commands = @( | |
{ # @mklement0, from https://stackoverflow.com/a/75176143/45375 | |
$hourMap = [ordered] @{} | |
Import-Csv $tempFile | | |
ForEach-Object { | |
# Get the in an out timestamps as [datetime] instances, | |
# reset to the start of the hour. | |
$in, $out = | |
($_.In, $_.Out).ForEach({ Get-Date $_ -Minute 0 -Second 0 -Millisecond 0 }) | |
# Loop over all hours in the time between in and out. | |
$timestamp = $in | |
while ($timestamp -le $out) { | |
# For the timestamp's calendar day, create a 24-element array | |
# representing the hours of the day; a given element's value will | |
# receive the count of clocked-in employees for that hour. | |
if (-not $hourMap.Contains($timestamp.Date)) { | |
$hourMap[$timestamp.Date] = [int[]]::new(24) | |
} | |
$hourMap[$timestamp.Date][$timestamp.Hour]++ | |
$timestamp = $timestamp.AddHours(1) | |
} | |
} | |
# Loop over all $hourMap entries, each representing a calendar day | |
$hourMap.GetEnumerator() | | |
ForEach-Object { | |
foreach ($hour in 0..23) { # For each calendar day, loop over all hours. | |
[pscustomobject] @{ | |
WorkDate = $_.Key.ToString('d') | |
Hour = $hour | |
Count = $_.Value[$hour] | |
} | |
} | |
} | | |
ConvertTo-Csv | |
} | |
{ # @jdweng, from https://stackoverflow.com/a/75176209/45375 | |
$table = Import-Csv $tempFile | |
foreach($row in $table) | |
{ | |
$startDate = ([System.DateTime]::ParseExact($row.In,"M/d/yy h:mm tt",$null)) | |
#for count to work properly you need to truncate minutes | |
$startDate = $startDate.AddMinutes(-$startDate.Minute) | |
$row | Add-Member -NotePropertyName InDate -NotePropertyValue $startDate | |
$row | Add-Member -NotePropertyName OutDate -NotePropertyValue ([System.DateTime]::ParseExact($row.Out,"M/d/yy h:mm tt",$null)) | |
} | |
$minDate = $table | Measure-Object -Property InDate -Minimum | |
$maxDate = $table | Measure-Object -Property OutDate -Maximum | |
#subtract minutes to get hour | |
$minDate = $minDate.minimum.AddMinutes(-$minDate.minimum.Minute) | |
$maxDate = $maxDate.maximum.AddMinutes(-$maxDate.maximum.Minute) | |
$out_table = [System.Collections.ArrayList]::new() | |
for($date = $minDate; $date -le $maxDate; $date = $date.AddHours(1)) | |
{ | |
$numEmployees = @($table | Where-Object { ($_.InDate -le $date) -and ($_.OutDate -ge $date)}) | |
$newRow = New-Object -TypeName psobject | |
$newRow | Add-Member -NotePropertyName Hour -NotePropertyValue $date | |
$newRow | Add-Member -NotePropertyName Count -NotePropertyValue $numEmployees.Count | |
$out_table.Add($newRow) | Out-Null | |
} | |
$out_table | |
} | |
) | |
Write-Verbose -Verbose "Running benchmarks with $totalRowCount sample rows, averaging $runCount runs..." | |
# Add -OutputToHost to print script-block output | |
Time-Command -Count $runCount $commands | |
# Clean up. | |
$tempFile | Remove-Item |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmarks for the solutions at https://stackoverflow.com/q/75347681/45375 | |
# Specify how many sample data lines to use (use multiples of 10) | |
$totalLineCount = 10000 | |
# How many test runs to average. | |
# Note: With values above 15 you'll start to see the effects of on-demand compilation. | |
$runCount = 10 | |
# Download and define function `Time-Command` on demand (will prompt). | |
# To be safe, inspect the source code at the specified URL first. | |
if (-not (Get-Command -ErrorAction Ignore Time-Command)) { | |
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1' | |
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 } | |
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null | |
if (-not ${function:Time-Command}) { exit 2 } | |
} | |
# 10 sample lines. | |
$tenLines = @' | |
ALPHA-FETOPROTEIN ROUTINE CH 0203 001 02/03/2023@10:45 LIVERF3 | |
###-##-#### #######,#### In lab | |
ALPHA-FETOPROTEIN ROUTINE CH 0203 234 02/03/2023@11:05 LIVER | |
###-##-#### ########,######## In lab | |
ANION GAP STAT CH 0203 124 02/03/2023@11:06 DAY | |
###-##-#### ######,##### #### In lab | |
BASIC METABOLIC PANE ROUTINE CH 0203 001 02/03/2023@10:45 LIVERF3 | |
###-##-#### #######,#### ###### In lab | |
BASIC METABOLIC PANE ROUTINE CH 0204 002 02/03/2023@10:45 LIVERF4 | |
###-##-#### #######,#### ###### In lab | |
'@ -split '\r?\n' | |
# Fill the sample input files with the requested number of lines. | |
Set-Content ($tempInFile = New-TemporaryFile).FullName -Value (, $tenLines * ($totalLineCount / 10)) | |
# Obtain a sample output-file path. | |
$tempOutFile = New-TemporaryFile | |
# Warm up the cache. | |
$null = Get-Content -Raw $tempInFile | |
# Define the solutions to compare, as an array of script blocks. | |
$commands = @( | |
{ # switch statement, in pipeline (streaming) | |
& { | |
$i = 1 | |
switch -File $tempInFile { | |
default { | |
if ($i++ % 2) { $firstLineInPair = $_ } | |
else { $firstLineInPair + ' ' + $_.TrimStart() } | |
} | |
} | |
} | Set-Content $tempOutFile | |
} | |
{ # switch statement, no pipeline (collect all output lines first) | |
Set-Content $tempOutFile -Value $( | |
$i = 1 | |
switch -File $tempInFile { | |
default { | |
if ($i++ % 2) { $firstLineInPair = $_ } | |
else { $firstLineInPair + ' ' + $_.TrimStart() } | |
} | |
}) | |
} | |
{ # -replace, in pipeline (streaming) | |
(Get-Content -Raw $tempInFile) -replace '(.+)\r?\n(?:\s+)(.+\r?\n)', '$1 $2' | | |
Set-Content $tempOutFile | |
} | |
{ # -replace, no pipeline (collect all output lines first) | |
Set-Content $tempOutFile -Value ((Get-Content -Raw $tempInFile) -replace '(.+)\r?\n(?:\s+)(.+\r?\n)', '$1 $2') | |
} | |
{ # Get-Content -ReadCount 2, in pipeline, (streaming) | |
Get-Content -ReadCount 2 $tempInFile | | |
ForEach-Object { $_[0] + ' ' + $_[1].TrimStart() } | | |
Set-Content $tempOutFile | |
} | |
{ # Get-Content -ReadCount 2, no pipeline (collect all output lines first) | |
Set-Content $tempOutFile -Value ( | |
(Get-Content -ReadCount 2 $tempInFile).ForEach({ $_[0] + ' ' + $_[1].TrimStart() }) | |
) | |
} | |
{ # .NET APIs (StreamReader, StreamWriter) | |
$reader = [System.IO.StreamReader]::new($tempInFile) | |
$writer = [System.IO.StreamWriter]::new($tempOutFile) | |
while (($line = $reader.ReadLine()) -ne $null) { | |
$secondLine = "" | |
if (!$reader.EndOfStream) { $secondLine = $reader.ReadLine() } | |
$writer.WriteLine($line + ' ' + $secondLine.TrimStart()) | |
} | |
$reader.Close() | |
$writer.Close() | |
} | |
) | |
# Run the benchmarks. | |
Write-Verbose -Verbose "Running benchmarks ($($totalLineCount.ToString('N0')) input lines, averaged over $runCount runs)..." | |
# Add -OutputToHost to print script-block output | |
Time-Command -Count $runCount $commands | |
# Clean up. | |
$tempInFile, $tempOutFile | Remove-Item |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmarks for a specific answer at https://stackoverflow.com/q/76011142/45375 | |
# This Gist compares the performance of: | |
# * @jdweng's solution at https://stackoverflow.com/a/76011282/45375 | |
# * with a PowerShell-idiomatic equivalent. | |
# | |
# On a Windows 11 22H2 machine running Windows PowerShell 5.1, | |
# the PowerShell-idiomatic solution is about 50(!) times faster, as | |
# the following sample output shows: | |
# VERBOSE: Running benchmarks with 1000 input lines... | |
# | |
# Factor Secs (15-run avg.) Command | |
# ------ ------------------ ------- | |
# 1.00 0.013 # The equivalent PowerShell-idiomatic solution... | |
# 48.68 0.635 # @jdweng's solution at https://stackoverflow.com/a/76011282/45375... | |
# Download and define function `Time-Command` on demand (will prompt). | |
# To be safe, inspect the source code at the specified URL first. | |
if (-not (Get-Command -ErrorAction Ignore Time-Command)) { | |
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1' | |
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 } | |
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null | |
if (-not ${function:Time-Command}) { exit 2 } | |
} | |
# Construct the input data: | |
# Specify the number of input lines (strings). | |
$count = 1e3 | |
# Construct sample strings. | |
$inputStrings = (, 'a.b.c.d=rest') * $count | |
$results = [ordered] @{ | |
List1 = $null | |
List2 = $null | |
} | |
# The solutions to compare, specified as script blocks ({ ... }) | |
$commandsToCompare = @( | |
{ # @jdweng's solution at https://stackoverflow.com/a/76011282/45375 | |
$results.List1 = $table = [System.Collections.ArrayList]::new() | |
foreach($row in $inputStrings) | |
{ | |
if($row.Contains('=')) | |
{ | |
$newRow = New-Object -TypeName psobject | |
$newRow | Add-Member -NotePropertyName column1 -NotePropertyValue 'abcdefg' | |
$splitRow = $row.Split('=') | |
$splitPeriod = $splitRow[0].Split('.') | |
$newRow | Add-Member -NotePropertyName column2 -NotePropertyValue $splitPeriod[0] | |
$newRow | Add-Member -NotePropertyName column3 -NotePropertyValue $splitPeriod[1] | |
$newRow | Add-Member -NotePropertyName column4 -NotePropertyValue $splitRow[1] | |
$table.Add($newRow) | Out-Null | |
} | |
} | |
} | |
{ # The equivalent PowerShell-idiomatic solution | |
$results.List2 = foreach ($row in $inputStrings) { | |
if($row.Contains('=')) { | |
$splitRow = $row.Split('=') | |
$splitPeriod = $splitRow[0].Split('.') | |
[pscustomobject] @{ | |
column1 = 'abcdefg' | |
column2 = $splitPeriod[0] | |
column3 = $splitPeriod[1] | |
column4 = $splitRow[1] | |
} | |
} | |
} | |
} | |
) | |
# Run the benchmarks (15-run average). | |
Write-Verbose -Verbose "Running benchmarks with $count input lines..." | |
Time-Command $commandsToCompare | |
# Make sure that the commands yielded the same results: | |
if ((Compare-Object $results.List1 $results.List2 -Property $results.List1[0].psobject.Properties.Name)) { | |
throw "Test commands unexpectedly yielded different results." | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmarks for https://stackoverflow.com/q/76419365/45375 | |
# Download and define function `Time-Command` on demand (will prompt). | |
# To be safe, inspect the source code at the specified URL first. | |
if (-not (Get-Command -ErrorAction Ignore Time-Command)) { | |
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1' | |
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 } | |
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null | |
if (-not ${function:Time-Command}) { exit 2 } | |
} | |
# Specify how many sample data lines to use: | |
$totalLineCount = 1e6 # 1 million | |
# Create a temporary file with the requested number of - short - lines: | |
$tempFile = New-TemporaryFile | |
1..$totalLineCount > $tempFile | |
# Warm up the cache: | |
$null = Get-Content -Raw $tempFile | |
# Run the benchmarks: | |
# Note: | |
# * Averages 15 runs by default; change with -Count $n | |
# * $n > 16 may distort the results to due to JIT compilation. | |
Time-Command { | |
[System.Linq.Enumerable]::Count( | |
[System.IO.File]::ReadLines((Convert-Path $tempFile)) | |
) | |
}, { | |
[System.IO.File]::ReadAllLines((Convert-Path $tempFile)).Length | |
}, { | |
wc -l $tempFile | |
}, { | |
$c = 0; switch -File $tempFile { default { ++$c } }; $c | |
} | |
# Clean up. | |
Remove-Item $tempFile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment