Last active
December 27, 2016 01:17
-
-
Save jessb321/c5b3c26c1471474ad6a595bca98d91d1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Functions | |
Function Download { | |
"Downloading the latest dump!" | |
$wc = New-Object System.Net.WebClient | |
$wc.Headers.add('Accept-Encoding', 'gzip, deflate, sdch') | |
$wc.DownloadFile($url, $output) | |
Write-Output "Time taken: $((Get-Date).Subtract($start_time).Seconds) second(s)" | |
"Finished downloading the latest dump!" | |
unzip | |
} | |
Function DeGZip-File { | |
Param( | |
$infile = "D:\temp\systems.gz", | |
$outfile = ($infile -replace '\.gz$','') | |
) | |
$input = New-Object System.IO.FileStream $inFile, ([IO.FileMode]::Open), ([IO.FileAccess]::Read), ([IO.FileShare]::Read) | |
$output = New-Object System.IO.FileStream $outFile, ([IO.FileMode]::Create), ([IO.FileAccess]::Write), ([IO.FileShare]::None) | |
$gzipStream = New-Object System.IO.Compression.GzipStream $input, ([IO.Compression.CompressionMode]::Decompress) | |
$buffer = New-Object byte[](1024) | |
while($true){ | |
$read = $gzipstream.Read($buffer, 0, 1024) | |
if ($read -le 0){break} | |
$output.Write($buffer, 0, $read) | |
} | |
$gzipStream.Close() | |
$output.Close() | |
$input.Close() | |
} | |
Function unzip { | |
"Unzipping the latest dump, and removing the zipped file." | |
DeGZip-File $infile $outfile | |
Remove-Item D:\temp\systems.gz | |
CheckSame | |
} | |
Function CheckSame { | |
$new_hash = Get-FileHash -Path D:\temp\systems.csv | |
if ($new_hash = $current_hash) { | |
"No need to update - Current file is the same as the old one." | |
} else { | |
processCSV | |
} | |
} | |
Function processCSV { | |
"Processing CSV, then splitting it." | |
Import-Csv -Path D:\temp\systems.csv | Select-Object name,x,y,z | | |
Export-Csv -Path D:\temp\systemsout.csv -delimiter ";" -NoTypeInformation # -inputObject { $_; Write-Progress "Exporting. Have fun looking at this:" "$($_) " } <-- slows things down HEAPS! | |
splitCSV | |
notifyDone | |
} | |
Function notifyDone { | |
if (Get-Module -ListAvailable -Name BurntToast) { | |
Import-module BurntToast | |
New-BurntToastNotification -Text "Finished!", 'The CSV script is finished. Check your PS window for more info.' | |
} else { | |
"Done! Finished!" | |
} | |
} | |
Function splitCSV { | |
$sw = new-object System.Diagnostics.Stopwatch | |
$sw.Start() | |
$filename = "D:\temp\systemsout.csv" | |
$rootName = "D:\temp\systemsSplit" | |
$ext = "csv" | |
$linesperFile = 230000#100k | |
$filecount = 1 | |
$reader = $null | |
try{ | |
$reader = [io.file]::OpenText($filename) | |
try{ | |
"Creating file number $filecount" | |
$writer = [io.file]::CreateText("{0}{1}.{2}" -f ($rootName,$filecount.ToString("000"),$ext)) | |
$filecount++ | |
$linecount = 0 | |
while($reader.EndOfStream -ne $true) { | |
"Reading $linesperFile" | |
while( ($linecount -lt $linesperFile) -and ($reader.EndOfStream -ne $true)){ | |
$writer.WriteLine($reader.ReadLine()); | |
$linecount++ | |
} | |
if($reader.EndOfStream -ne $true) { | |
"Closing file" | |
$writer.Dispose(); | |
"Creating file number $filecount" | |
$writer = [io.file]::CreateText("{0}{1}.{2}" -f ($rootName,$filecount.ToString("000"),$ext)) | |
$filecount++ | |
$linecount = 0 | |
} | |
} | |
} finally { | |
$writer.Dispose(); | |
} | |
} finally { | |
$reader.Dispose(); | |
} | |
$sw.Stop() | |
Write-Host "Split complete in " $sw.Elapsed.TotalSeconds "seconds" | |
} | |
# Vars | |
$url = "https://eddb.io/archive/v5/systems.csv" | |
$output = "D:\temp\systems.gz" | |
$start_time = Get-Date | |
$ChkTemp = "D:\temp" | |
$ChkSystemsCSV = "D:\temp\systems.csv" | |
$ChkOutCSV = "D:\temp\systemsout.csv" | |
$ChkSplit = "D:\temp\systemsSplit*.csv" | |
$tempexists = Test-Path $ChkTemp | |
$systemsscvexists = Test-Path $ChkSystemsCSV | |
$systemsoutcsvexists = Test-Path $ChkOutCSV | |
$systemssplitexists = Test-Path $ChkSplit | |
$infile="D:\temp\systems.gz" | |
$outfile="D:\temp\systems.csv" | |
$timespan = new-timespan -days 1 | |
# Lets Go! | |
"Starting at $start_time" | |
If ($tempexists -eq $True) { | |
if ($systemsscvexists -eq $True) { | |
$csvAge = (get-item D:\temp\systems.csv).LastWriteTime | |
$current_hash = Get-FileHash -Path D:\temp\systems.csv | |
"Found systems.csv - Checking if there's a newer version available" | |
if (((get-date) - $csvAge) -gt $timespan) { | |
"CSV is out of date. Downloading the new one." | |
Download | |
} else { | |
if ($systemsoutcsvexists -eq $True) { | |
if ($systemssplitexists -eq $True) { | |
"CSV is probably fairly up to date. Check again later" | |
notifyDone | |
} else { | |
"CSV is downloaded and processed, but the split files don't exist. Fixing that." | |
splitCSV | |
} | |
} else { | |
"Downloaded CSV is up to date, but you dont have the output csv. Fixing that." | |
processCSV | |
} | |
} | |
} else { | |
"No systems.csv found. Downloading." | |
Download | |
} | |
} else { | |
New-Item -ItemType Directory -Path D:\temp | |
Download | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment