Skip to content

Instantly share code, notes, and snippets.

@oschonrock
Last active January 25, 2019 00:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oschonrock/b76b1fbf4e01bfe6b30b8a42921bf38d to your computer and use it in GitHub Desktop.
Save oschonrock/b76b1fbf4e01bfe6b30b8a42921bf38d to your computer and use it in GitHub Desktop.
<?php
/**
* intention of this class is not to do anything too fancy
* it basically just runs one query, but it does so with ease and in a scalable and safe way
* ie it won't block, you don't have to worry about setting up a new connection
* it will reuse the connection and the prepared statement if you make multiple calls
* been tested and refined in high volume production for 1 year.
*
* to use it just customise the __contrsuct below with your DB details and call:
*
* Hibp::isPawned($password)
*
* which retruns a boolean
*
*/
class Hibp extends PDO
{
private static $con;
private static $stmt;
// see here for notes on how to load this DB from the source (delimited text file)
// https://gist.github.com/oschonrock/b76b1fbf4e01bfe6b30b8a42921bf38d
public function __construct(String $dsn = 'mysql:dbname=hibp;host=hostname',
String $username = 'username', String $passwd = 'password',
Array $options = [])
{
return parent::__construct($dsn, $username, $passwd, $options);
}
// public only for testing
public static function getConnection()
{
if (!self::$con instanceof self)
{
self::$con = new self();
// experimemt with getting int and float values from mysqlnd, and always use FETCH_OBJ, because it has the nicest syntax
self::$con->setAttribute(PDO::ATTR_STRINGIFY_FETCHES, false);
self::$con->setAttribute(PDO::ATTR_EMULATE_PREPARES, false);
self::$con->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ);
}
return self::$con;
}
private static function getStmt()
{
if (!self::$stmt instanceof PDOStatement)
{
$con = self::getConnection();
self::$stmt = $con->prepare("SET STATEMENT max_statement_time=1 FOR
select SQL_NO_CACHE cnt from pw where hash = :sha1_password limit 1;");
}
return self::$stmt;
}
public static function isPawned(String $plaintext_password)
{
try
{
$stmt = self::getStmt();
}
catch (PDOException $e)
{
return false; // if db not found, then assume pw is not pawned
}
if ($stmt === false)
{
return false; // if stmt prepare fails, then assume pw is not pawned
}
$bind_ok = $stmt->bindValue(':sha1_password', sha1($plaintext_password, true), PDO::PARAM_STR);
if ($bind_ok === false)
{
return false; // if stmt->bindValue fails, then assume pw is not pawned
}
$stmt->execute();
if ($stmt === false)
{
return false; // if stmt->execute fails, then assume pw is not pawned
}
$cnt = $stmt->fetchColumn();
if ($cnt === false)
{
return false; // if empty result, then pw is not pawned
}
// if count is 1 or more (true by definition really if we get here), then password has been pawned
// $cnt is already type int due to PDO::ATTR_STRINGIFY_FETCHES = false
return $cnt > 0;
}
}
-- spent some time finding a good table structure and a fastest way to load the "Have I Been pawned" https://haveibeenpwned.com/Passwords
-- list of compromised passwords into a MariaDB or MySQL database for fast, unlimited, local querying
drop table if exists pw;
-- store the sha1 hashes in a binary(20) column (most space efficient)
-- use that column as the PK
-- PK uses default BTREE type index, because HASH type is not available for disk based storage engines (only MEMORY/HEAP)
-- don't add PK until after LOAD DATE INFILE is complete, because that is about twice as fast
-- use Aria engine with no transactions for MariaDB, use MyIsam for MySQL, because these are faster than InnoDB and totally sufficient
create table pw (
hash binary(20) not null,
cnt int(11) not null
) engine=aria TRANSACTIONAL=0 TABLE_CHECKSUM=0;
-- tell load data infile about the windows line-ends in the source file
-- specify ":" as the field separator
-- use local variables for each row and do some simple data transformation
-- trim(@cnt) gets rid of spaces at end of each line and prevents "truncation" warnings for the cnt column
-- HIBP is now much better about ensuring their cnt values do not have redundant spaces, but we leave the trim() as
-- it doesn't hurt.
-- unhex(@txthash) transforms the "hexdgit format" of the sha1 hashes into binary for the "hash" column
load data infile 'pwned-passwords-ordered-by-hash.txt' into table pw fields terminated by ':' LINES TERMINATED BY '\r\n'
(@txthash, @cnt)
set hash = unhex(@txthash), cnt = trim(@cnt);
-- takes about 30mins, on Core i5 with spinning disks (seems evenly CPU and I/O bound on this machine)
alter table pw add primary key(hash);
-- takes about 30mins on same machine (CPU bound on this hardware)
-- ensure you use the "sha1-..-ordered-by-hash" version of the 7z/txt file, as this is about 3x as fast (already nearly sorted)
-- querying the table with a plaintext password of "password"
-- using SQL_NO_CACHE to prove this is not cached, and it would probably be pointless
-- using SET STATEMENT max_statement_time=1 FOR syntax is mariadb specific,
-- but ensures we don't block during login for more than 1 second (eg during inserting a new copy of HIBP DB)
SET STATEMENT max_statement_time=1 FOR
select SQL_NO_CACHE cnt from pw where hash = unhex(sha1('password'));
+---------+
| cnt |
+---------+
| 3303003 |
+---------+
1 row in set (0.01 sec)
-- Space usage Data 17.1 GiB
-- Index 11.3 GiB
-- Total 28.4 GiB
-- where to put this table?
-- Recommend to put it a separate database from your main application
-- this way it won't inflate your backups or copies for dev environments
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment