Created
December 27, 2010 06:11
-
-
Save whym/755908 to your computer and use it in GitHub Desktop.
bgrep: a binary grep for fixed-size blocks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <sstream> | |
#include <fstream> | |
#include <cstring> | |
#include <cstdlib> | |
#define PREFIX "/tmp/frag." | |
#define BLOCKSIZE 16384 | |
const char* bytesbytes(const char* a, const char* b, size_t as, size_t bs) { | |
size_t j = 0; | |
for ( size_t i = 0; i < as; ++i ) { | |
if ( a[i] != b[j] ) { | |
j = 0; | |
continue; | |
} | |
while ( 1 ) { | |
if ( i == as ) { | |
return NULL; | |
} | |
if ( j == bs ) { | |
return &a[i]; | |
} | |
if ( a[i] != b[j] ) { | |
j = 0; | |
break; | |
} | |
++i; | |
++j; | |
} | |
} | |
return NULL; | |
} | |
void print_help() { | |
printf("bgrep -p PATTERN [-o PREFIX] [-b BLOCK_SIZE] < FILE\n" | |
"bgrep: a binary grep for fixed-size blocks\n" | |
" -p PATTERN\n" | |
" Specify the pattern you look for.\n" | |
" -o PREFIX [default: %s]\n" | |
" Matched blocks are written as ${PREFIX}1, ${PREFIX}2, etc.\n" | |
" -b BLOCK_SIZE [default: %d]\n" | |
" Specify the length of a block in bytes.\n", | |
PREFIX, BLOCKSIZE); | |
} | |
int main(int argc, char** argv) { | |
using namespace std; | |
int blocksize = BLOCKSIZE; | |
int offset = 0; | |
const char* output = PREFIX; | |
const char* pattern = ""; | |
for (; *argv != NULL; ++argv) { | |
if ( strcmp("-b", *argv) == 0 ) { | |
++argv; | |
blocksize = atoi(*argv); | |
continue; | |
} | |
if ( strcmp("-s", *argv) == 0 ) { | |
++argv; | |
offset = atoi(*argv); | |
continue; | |
} | |
if ( strcmp("-p", *argv) == 0 ) { | |
++argv; | |
pattern = *argv; | |
continue; | |
} | |
if ( strcmp("-o", *argv) == 0 ) { | |
++argv; | |
output = *argv; | |
continue; | |
} | |
if ( strcmp("-h", *argv) == 0 ) { | |
print_help(); | |
return 0; | |
} | |
if ( *argv[0] == '-' ) { | |
cerr << "undefined argument: " << *argv << endl; | |
} | |
} | |
if ( strcmp("", pattern) == 0 ) { | |
cerr << "no pattern given" << endl; | |
return 1; | |
} | |
if ( strcmp("", output) == 0 ) { | |
cerr << "output is empty" << endl; | |
return 1; | |
} | |
int patlen = strlen(pattern); | |
char* buff = new char[blocksize]; | |
int n = 0; | |
ofstream fout; | |
while ( !cin.eof() ) { | |
cin.read(buff, blocksize); | |
int buffcount = cin.gcount(); | |
const char * p = bytesbytes(buff, pattern, buffcount, patlen); | |
if ( p != NULL ) { | |
++n; | |
ostringstream fname(""); | |
fname << string(output) << n; | |
fout.open(fname.str().c_str()); | |
cerr << fname.str() << ": " << buffcount << " at " << (p - buff) << ", " << cin.tellg() << endl; | |
if ( fout.fail() ) { | |
cerr << "fail in " << fname << endl; | |
fout.close(); | |
return 1; | |
} | |
fout.write(buff, buffcount); | |
fout.close(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment