Created
April 2, 2018 16:44
-
-
Save llamasoft/241b1343bd6bef91e0461c3e8d985f29 to your computer and use it in GitHub Desktop.
HaveIBeenPwned Found Plaintext Frequency
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
# Use bytewise locale, this provides performance benefits and prevents possible UTF8 warnings | |
export LC_ALL="C" | |
# `plains.txt` is the "found" list from Hashes.org | |
# https://hashes.org/leaks.php?id=515 | |
# `HIBP_hashcount.txt` is the Version 2 *ordered by hash* list from haveibeenpwned.com | |
# https://haveibeenpwned.com/Passwords | |
# This perl snippet regenerates the SHA1 hash from the plaintext. | |
# Some plains are in hashcat $HEX[...] format and need to be decoded. | |
# The input is mostly, but not entirely sorted, so we fix that here as well. | |
perl -M'Digest::SHA1 qw{sha1_hex}' -ne ' | |
chomp; | |
$plain = $_; | |
if ( $plain =~ /^\$HEX\[([0-9a-fA-F]+)\]$/ ) { | |
$raw = pack("H*", $1); | |
} else { | |
$raw = $plain; | |
} | |
print sha1_hex($raw) .":". $plain ."\n"; | |
' "plains.txt" | sort -t':' -k1 > "hash_plains.txt" | |
# The HIBP hash list has trailing spaces and carriage returns. | |
# We'll strip those out on-the-fly while joining the two inputs. | |
# The output of `join` is hash:count:plain, but we no longer want the hash, so we `cut` it out. | |
# Then we change the delimiter to a space and sort the output on count, descending. | |
tr -d ' \r' < "HIBP_hashcount.txt" \ | |
| join -j 1 -t ':' "-" "hash_plains.txt" \ | |
| cut -d ':' -f2- \ | |
| sed 's/:/ /' \ | |
| sort -k1,1rn -k2b > "count_plains.txt" | |
# If all goes well, your output file should look something like this: | |
# 20760336 123456 | |
# 7016669 123456789 | |
# 3599486 qwerty | |
# 3303003 password | |
# 2900049 111111 | |
head -5 "count_plains.txt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment