Skip to content

Instantly share code, notes, and snippets.

@proxb

proxb/HadoopChallenge.ps1

Last active May 24, 2020
Embed
What would you like to do?
Measure-Command {
$sessionstate = [system.management.automation.runspaces.initialsessionstate]::CreateDefault()
$runspacepool = [runspacefactory]::CreateRunspacePool(1, 10, $sessionstate, $Host)
$runspacepool.Open()
$ScriptBlock = {
Param ($File)
$tempHash = @{}
ForEach ($Item in $File) {
$RawFile = [activator]::CreateInstance([System.IO.StreamReader],@($Item, [System.Text.Encoding]::UTF8,$True,524288)).ReadToEnd()
ForEach ($Group in ([regex]::Matches($RawFile,'\[Result \"(.*)\"'))) {
$tempHash[$Group.groups[1].Value]++
}
}
return $tempHash
}
$runspaces = New-Object System.Collections.ArrayList
$Files = (Get-ChildItem ChessData-master -Directory | Select -First 10 | Get-ChildItem -Filter *.pgn -File).where({$_.length -gt 5MB},'Split')
#Create the powershell instance and supply the scriptblock with the other parameters
$powershell = [powershell]::Create().AddScript($ScriptBlock).AddArgument($Files[1].fullname)
#Add the runspace into the powershell instance
$powershell.RunspacePool = $runspacepool
#Save the handle output when calling BeginInvoke() that will be used later to end the runspace
[void]$runspaces.Add(([pscustomobject]@{
PowerShell = $PowerShell
Runspace = ($powershell.BeginInvoke())
}))
ForEach ($Item in $Files[0]) {
#Create the powershell instance and supply the scriptblock with the other parameters
$powershell = [powershell]::Create().AddScript($ScriptBlock).AddArgument($Item.fullname)
#Add the runspace into the powershell instance
$powershell.RunspacePool = $runspacepool
#Save the handle output when calling BeginInvoke() that will be used later to end the runspace
[void]$runspaces.Add(([pscustomobject]@{
PowerShell = $PowerShell
Runspace = ($powershell.BeginInvoke())
}))
}
$Hash= @{
'0-1' = 0
'1-0' = 0
'1/2-1/2' = 0
}
Do {
$more = $false
ForEach($Item in @($runspaces)){
If ($Item.Runspace.isCompleted) {
($Item.powershell.EndInvoke($Item.Runspace))[0].GetEnumerator().ForEach({
If ($_.name -ne '*') {
$Hash[$_.Name]+=$_.value
}
})
$Item.powershell.dispose()
[void]$runspaces.remove($Item)
}
}
If ($runspaces.count -gt 0) {
$more = $True
Start-Sleep -Milliseconds 100
}
} while ($more)
}
[pscustomobject]$Hash
@proxb

This comment has been minimized.

Copy link
Owner Author

@proxb proxb commented Mar 5, 2016

Obligatory screenshot of time:
hadoopchallenge1

EDIT: Had a twitter user run my code and pull 31 seconds with it. So that is cool.

@proxb

This comment has been minimized.

Copy link
Owner Author

@proxb proxb commented Mar 6, 2016

Latest update drops about 12 seconds on the run.
hadoopchallenge2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment