-
-
Save m33x/3e0ab19a53384c036db29f996cb60733 to your computer and use it in GitHub Desktop.
Some tools we developed to deal with incorrectly parsed strings in the HIBP 320m dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ echo -n "password" | perl hexit.pl | |
$HEX[64] | |
$HEX[7264] | |
$HEX[6f7264] | |
$HEX[776f7264] | |
$HEX[73776f7264] | |
$HEX[7373776f7264] | |
$HEX[617373776f7264] | |
$HEX[70617373776f7264] | |
$ cat hexit.pl | |
#!/usr/bin/env perl | |
while ($_ = <STDIN>) { | |
chomp $_; | |
$y = length ($_); | |
for ($x=$y-1; $x >=0; $x--) { | |
$v = substr($_,$x); | |
$v = "\$HEX[".unpack("H*",$v)."]"; | |
print "$v\n"; | |
} | |
} | |
$ echo -n "password" | perl r2l.pl | |
d | |
rd | |
ord | |
word | |
sword | |
ssword | |
assword | |
password | |
$ cat r2l.pl | |
#!/usr/bin/env perl | |
while ($_ = <STDIN>) { | |
chomp $_; | |
$y = length ($_); | |
for ($x=$y-1; $x >=0; $x--) { | |
print substr($_,$x),"\n"; | |
} | |
} | |
$ echo "A password is a word or string of characters used for user authentication to prove identity" > demo | |
$ gcc -O3 r40.c -o r40 | |
$ ./r40 < demo | |
A password is a word or string of charac | |
password is a word or string of charact | |
password is a word or string of characte | |
assword is a word or string of character | |
ssword is a word or string of characters | |
sword is a word or string of characters | |
word is a word or string of characters u | |
ord is a word or string of characters us | |
... | |
$ cat r40.c | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <fcntl.h> | |
// Most of the code was shamelessly stolen from hashcat-utils | |
// https://github.com/hashcat/hashcat-utils | |
// Rotates a string to the left. See example below: | |
// 'authentication to prove identity or acce' | |
// 'uthentication to prove identity or accea' | |
void strrotl (char *s, int len) { | |
char *s1 = s; | |
char *s2 = s; | |
for (s2 += len - 1; s1 < s2; s2--) { | |
*s1 ^= *s2; | |
*s2 ^= *s1; | |
*s1 ^= *s2; | |
} | |
} | |
// Removes \n \r chars from string | |
size_t super_chop (char *s, size_t len) { | |
char *p = s + len - 1; | |
while (len) { | |
if (*p != '\n') { | |
break; | |
} | |
*p-- = 0; | |
len--; | |
} | |
while (len) { | |
if (*p != '\r') { | |
break; | |
} | |
*p-- = 0; | |
len--; | |
} | |
return len; | |
} | |
// Reads a line from file | |
int fgetl (FILE *stream, size_t sz, char *buf) { | |
if (feof (stream)) { | |
return -1; | |
} | |
char *s = fgets (buf, sz, stream); | |
if (s == NULL) { | |
return -1; | |
} | |
size_t len = strlen (s); | |
len = super_chop (s, len); | |
return len; | |
} | |
int main (int argc, char *argv[]) { | |
// Print usage help | |
if (argc != 1) { | |
fprintf (stderr, "usage: %s < infile > outfile\n", argv[0]); | |
return (-1); | |
} | |
// Windows prevents that carriage return–line feed (CR-LF) combinations are translated | |
#ifdef _WINDOWS | |
_setmode (_fileno (stdin), _O_BINARY); // sets binary (untranslated) mode | |
#endif | |
char line_buf[BUFSIZ]; | |
char output_buf[41]; | |
output_buf[40] = 0; | |
int line_len; | |
int pos = 0; | |
// Go through all lines, append char by char until length == 40 chars, then print | |
while ((line_len = fgetl (stdin, BUFSIZ, line_buf)) != -1) { | |
// Skip line if empty | |
if (line_len == 0) { | |
continue; | |
} | |
for (int i = 0; i<line_len; i++) { | |
// Is output 40 chars -> Print | |
if (pos > 39) { | |
puts(output_buf); | |
strrotl(output_buf, 40); // Rotates the string | |
output_buf[39] = line_buf[i]; | |
} else { // If not, append char to output_buf string | |
output_buf[pos] = line_buf[i]; | |
pos ++; | |
} | |
} | |
} | |
// Print last line if EOF is reached | |
puts(output_buf); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment