Skip to content

Instantly share code, notes, and snippets.

@whym
Created December 27, 2010 06:11
Show Gist options
  • Save whym/755908 to your computer and use it in GitHub Desktop.
Save whym/755908 to your computer and use it in GitHub Desktop.
bgrep: a binary grep for fixed-size blocks
#include <iostream>
#include <sstream>
#include <fstream>
#include <cstring>
#include <cstdlib>
#define PREFIX "/tmp/frag."
#define BLOCKSIZE 16384
const char* bytesbytes(const char* a, const char* b, size_t as, size_t bs) {
size_t j = 0;
for ( size_t i = 0; i < as; ++i ) {
if ( a[i] != b[j] ) {
j = 0;
continue;
}
while ( 1 ) {
if ( i == as ) {
return NULL;
}
if ( j == bs ) {
return &a[i];
}
if ( a[i] != b[j] ) {
j = 0;
break;
}
++i;
++j;
}
}
return NULL;
}
void print_help() {
printf("bgrep -p PATTERN [-o PREFIX] [-b BLOCK_SIZE] < FILE\n"
"bgrep: a binary grep for fixed-size blocks\n"
" -p PATTERN\n"
" Specify the pattern you look for.\n"
" -o PREFIX [default: %s]\n"
" Matched blocks are written as ${PREFIX}1, ${PREFIX}2, etc.\n"
" -b BLOCK_SIZE [default: %d]\n"
" Specify the length of a block in bytes.\n",
PREFIX, BLOCKSIZE);
}
int main(int argc, char** argv) {
using namespace std;
int blocksize = BLOCKSIZE;
int offset = 0;
const char* output = PREFIX;
const char* pattern = "";
for (; *argv != NULL; ++argv) {
if ( strcmp("-b", *argv) == 0 ) {
++argv;
blocksize = atoi(*argv);
continue;
}
if ( strcmp("-s", *argv) == 0 ) {
++argv;
offset = atoi(*argv);
continue;
}
if ( strcmp("-p", *argv) == 0 ) {
++argv;
pattern = *argv;
continue;
}
if ( strcmp("-o", *argv) == 0 ) {
++argv;
output = *argv;
continue;
}
if ( strcmp("-h", *argv) == 0 ) {
print_help();
return 0;
}
if ( *argv[0] == '-' ) {
cerr << "undefined argument: " << *argv << endl;
}
}
if ( strcmp("", pattern) == 0 ) {
cerr << "no pattern given" << endl;
return 1;
}
if ( strcmp("", output) == 0 ) {
cerr << "output is empty" << endl;
return 1;
}
int patlen = strlen(pattern);
char* buff = new char[blocksize];
int n = 0;
ofstream fout;
while ( !cin.eof() ) {
cin.read(buff, blocksize);
int buffcount = cin.gcount();
const char * p = bytesbytes(buff, pattern, buffcount, patlen);
if ( p != NULL ) {
++n;
ostringstream fname("");
fname << string(output) << n;
fout.open(fname.str().c_str());
cerr << fname.str() << ": " << buffcount << " at " << (p - buff) << ", " << cin.tellg() << endl;
if ( fout.fail() ) {
cerr << "fail in " << fname << endl;
fout.close();
return 1;
}
fout.write(buff, buffcount);
fout.close();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment