Skip to content

Instantly share code, notes, and snippets.

@iAnatoly
Last active June 19, 2018 20:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iAnatoly/efef58ed371950fe90a145e2194d507e to your computer and use it in GitHub Desktop.
Save iAnatoly/efef58ed371950fe90a145e2194d507e to your computer and use it in GitHub Desktop.
How to upload 501M SHA1 hashes into a SQL database
# How to upload 501M SHA1 hashes into a SQL database
#
# Based on:
# https://blog.netnerds.net/2015/01/powershell-high-performance-techniques-for-importing-csv-to-sql-server/
# https://gallery.technet.microsoft.com/scriptcenter/Import-Large-CSVs-into-SQL-216223d9
#
# Database variables
$sqlserver = "YOUR_SERVER"
$database = "YOUR_DATABASE"
$table = "YOUR_TABLE"
# CSV variables
$csvfile = ".\pwned-passwords-ordered-2.0.txt" # downloaded from https://haveibeenpwned.com/Passwords
################################# service functiion in c#. ###############################
#
# Damn you powershell, it all starts so easy - and then you end up writing code like this.
#
$code = @"
//
// https://stackoverflow.com/questions/321370/how-can-i-convert-a-hex-string-to-a-byte-array
//
namespace Helper {
using System;
public class Convert {
public static byte[] StringToByteArrayFastest(string hex)
{
if (hex.Length % 2 == 1)
{
throw new Exception("The binary key cannot have an odd number of digits");
}
byte[] arr = new byte[hex.Length >> 1];
for (int i = 0; i < hex.Length >> 1; ++i)
{
arr[i] = (byte)((GetHexVal(hex[i << 1]) << 4) + (GetHexVal(hex[(i << 1) + 1])));
}
return arr;
}
private static int GetHexVal(char hex)
{
int val = (int)hex;
//For uppercase A-F letters:
return val - (val < 58 ? 48 : 55);
//For lowercase a-f letters:
//return val - (val < 58 ? 48 : 87);
//Or the two combined, but a bit slower:
//return val - (val < 58 ? 48 : (val < 97 ? 55 : 87));
}
}
}
"@;
Add-Type -TypeDefinition $code -Language CSharp
$elapsed = [System.Diagnostics.Stopwatch]::StartNew()
[void][Reflection.Assembly]::LoadWithPartialName("System.Data")
[void][Reflection.Assembly]::LoadWithPartialName("System.Data.SqlClient")
$batchsize = 50000
#connect to sql
$connectionstring = "Data Source=$sqlserver;Integrated Security=true;Initial Catalog=$database;"
$bulkcopy = New-Object Data.SqlClient.SqlBulkCopy($connectionstring, [System.Data.SqlClient.SqlBulkCopyOptions]::TableLock)
$bulkcopy.DestinationTableName = $table
$bulkcopy.bulkcopyTimeout = 0
$bulkcopy.batchsize = $batchsize
# Create the datatable. Add the columns by hand, since we have custom datatypes
$datatable = New-Object System.Data.DataTable
[void]$datatable.Columns.Add("SHA1",[System.Byte[]])
[void]$datatable.Columns.Add("Freq",[System.Int32])
$i=0;
# Read in the data, line by line. No headers in our case.
$reader = New-Object System.IO.StreamReader($csvfile)
while (($line = $reader.ReadLine()) -ne $null)
{
$fields = $line.Split(":")
$row =$datatable.NewRow()
$row.SHA1 = [Helper.Convert]::StringToByteArrayFastest($fields[0])
$row.Freq = [int]$fields[1]
[void]$datatable.Rows.Add($row)
$i++;
if (($i % $batchsize) -eq 0)
{
$bulkcopy.WriteToServer($datatable)
$datatable.Clear()
Write-Host "row count: $i `t elapsed: $($elapsed.Elapsed)"
}
}
# Add in all the remaining rows since the last .Clear()
if($datatable.Rows.Count -gt 0)
{
Write-Host "flushing last chunk of $($datatable.Rows.Count) rows"
$bulkcopy.WriteToServer($datatable)
$datatable.Clear()
Write-Host "row count: $i `t elapsed: $($elapsed.Elapsed)"
}
# Clean Up
$reader.Close(); $reader.Dispose()
$bulkcopy.Close(); $bulkcopy.Dispose()
$datatable.Dispose()
Write-Host "Script complete. $i rows have been inserted into the database."
Write-Host "Total Elapsed Time: $($elapsed.Elapsed.ToString())"
[System.GC]::Collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment