Skip to content

Instantly share code, notes, and snippets.

@jessb321
Last active December 27, 2016 01:17
Show Gist options
  • Save jessb321/c5b3c26c1471474ad6a595bca98d91d1 to your computer and use it in GitHub Desktop.
Save jessb321/c5b3c26c1471474ad6a595bca98d91d1 to your computer and use it in GitHub Desktop.
# Functions
Function Download {
"Downloading the latest dump!"
$wc = New-Object System.Net.WebClient
$wc.Headers.add('Accept-Encoding', 'gzip, deflate, sdch')
$wc.DownloadFile($url, $output)
Write-Output "Time taken: $((Get-Date).Subtract($start_time).Seconds) second(s)"
"Finished downloading the latest dump!"
unzip
}
Function DeGZip-File {
Param(
$infile = "D:\temp\systems.gz",
$outfile = ($infile -replace '\.gz$','')
)
$input = New-Object System.IO.FileStream $inFile, ([IO.FileMode]::Open), ([IO.FileAccess]::Read), ([IO.FileShare]::Read)
$output = New-Object System.IO.FileStream $outFile, ([IO.FileMode]::Create), ([IO.FileAccess]::Write), ([IO.FileShare]::None)
$gzipStream = New-Object System.IO.Compression.GzipStream $input, ([IO.Compression.CompressionMode]::Decompress)
$buffer = New-Object byte[](1024)
while($true){
$read = $gzipstream.Read($buffer, 0, 1024)
if ($read -le 0){break}
$output.Write($buffer, 0, $read)
}
$gzipStream.Close()
$output.Close()
$input.Close()
}
Function unzip {
"Unzipping the latest dump, and removing the zipped file."
DeGZip-File $infile $outfile
Remove-Item D:\temp\systems.gz
CheckSame
}
Function CheckSame {
$new_hash = Get-FileHash -Path D:\temp\systems.csv
if ($new_hash = $current_hash) {
"No need to update - Current file is the same as the old one."
} else {
processCSV
}
}
Function processCSV {
"Processing CSV, then splitting it."
Import-Csv -Path D:\temp\systems.csv | Select-Object name,x,y,z |
Export-Csv -Path D:\temp\systemsout.csv -delimiter ";" -NoTypeInformation # -inputObject { $_; Write-Progress "Exporting. Have fun looking at this:" "$($_) " } <-- slows things down HEAPS!
splitCSV
notifyDone
}
Function notifyDone {
if (Get-Module -ListAvailable -Name BurntToast) {
Import-module BurntToast
New-BurntToastNotification -Text "Finished!", 'The CSV script is finished. Check your PS window for more info.'
} else {
"Done! Finished!"
}
}
Function splitCSV {
$sw = new-object System.Diagnostics.Stopwatch
$sw.Start()
$filename = "D:\temp\systemsout.csv"
$rootName = "D:\temp\systemsSplit"
$ext = "csv"
$linesperFile = 230000#100k
$filecount = 1
$reader = $null
try{
$reader = [io.file]::OpenText($filename)
try{
"Creating file number $filecount"
$writer = [io.file]::CreateText("{0}{1}.{2}" -f ($rootName,$filecount.ToString("000"),$ext))
$filecount++
$linecount = 0
while($reader.EndOfStream -ne $true) {
"Reading $linesperFile"
while( ($linecount -lt $linesperFile) -and ($reader.EndOfStream -ne $true)){
$writer.WriteLine($reader.ReadLine());
$linecount++
}
if($reader.EndOfStream -ne $true) {
"Closing file"
$writer.Dispose();
"Creating file number $filecount"
$writer = [io.file]::CreateText("{0}{1}.{2}" -f ($rootName,$filecount.ToString("000"),$ext))
$filecount++
$linecount = 0
}
}
} finally {
$writer.Dispose();
}
} finally {
$reader.Dispose();
}
$sw.Stop()
Write-Host "Split complete in " $sw.Elapsed.TotalSeconds "seconds"
}
# Vars
$url = "https://eddb.io/archive/v5/systems.csv"
$output = "D:\temp\systems.gz"
$start_time = Get-Date
$ChkTemp = "D:\temp"
$ChkSystemsCSV = "D:\temp\systems.csv"
$ChkOutCSV = "D:\temp\systemsout.csv"
$ChkSplit = "D:\temp\systemsSplit*.csv"
$tempexists = Test-Path $ChkTemp
$systemsscvexists = Test-Path $ChkSystemsCSV
$systemsoutcsvexists = Test-Path $ChkOutCSV
$systemssplitexists = Test-Path $ChkSplit
$infile="D:\temp\systems.gz"
$outfile="D:\temp\systems.csv"
$timespan = new-timespan -days 1
# Lets Go!
"Starting at $start_time"
If ($tempexists -eq $True) {
if ($systemsscvexists -eq $True) {
$csvAge = (get-item D:\temp\systems.csv).LastWriteTime
$current_hash = Get-FileHash -Path D:\temp\systems.csv
"Found systems.csv - Checking if there's a newer version available"
if (((get-date) - $csvAge) -gt $timespan) {
"CSV is out of date. Downloading the new one."
Download
} else {
if ($systemsoutcsvexists -eq $True) {
if ($systemssplitexists -eq $True) {
"CSV is probably fairly up to date. Check again later"
notifyDone
} else {
"CSV is downloaded and processed, but the split files don't exist. Fixing that."
splitCSV
}
} else {
"Downloaded CSV is up to date, but you dont have the output csv. Fixing that."
processCSV
}
}
} else {
"No systems.csv found. Downloading."
Download
}
} else {
New-Item -ItemType Directory -Path D:\temp
Download
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment