Benchmark of PowerShell de-duplication cmdlets
# | |
# Benchmarking code used to produce results from blog post | |
# http://latkin.org/blog/2016/08/02/curious-behavior-when-de-duplicating-a-collection-in-powershell/ | |
# | |
# home-grown HashSet-based approach | |
function hashunique { | |
param( | |
[Parameter(Mandatory=$true, ValueFromPipeline = $true)] | |
[Object[]] $InputObject | |
) | |
begin { $t = New-Object System.Collections.Generic.HashSet[Object] } | |
process { | |
foreach($x in $inputObject) { | |
if($t.Add($x)) { $x } | |
} | |
} | |
} | |
1..20 |%{ | |
$collectionSize = $_ * 500 | |
$data = 1..$collectionSize |%{ Get-Random -Min 1 -Max ($collectionSize/2) } | |
$selectTime = Measure-Command { $data | select -unique } | |
$sortTime = Measure-Command { $data | sort -unique } | |
$groupTime = Measure-Command { $data | group |% Name } | |
$uniqTime = Measure-Command { $data | hashunique } | |
[PSCustomObject] @{ | |
'Collection Size' = $collectionSize | |
'select -unique' = $selectTime.TotalMilliseconds | |
'sort -unique' = $sortTime.TotalMilliseconds | |
'group |% Name' = $groupTime.TotalMilliseconds | |
'hashunique' = $uniqTime.TotalMilliseconds | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment