Skip to content

Instantly share code, notes, and snippets.

@little-brother
Last active September 6, 2021 18:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save little-brother/d858fa77644ff8761519c9743fc9f8ed to your computer and use it in GitHub Desktop.
Save little-brother/d858fa77644ff8761519c9743fc9f8ed to your computer and use it in GitHub Desktop.
Undark (fork) - SQLite deleted and corrupted data recovery tool
// Forked from http://pldaniels.com/undark/
// Build: gcc undark.c -o undark.exe -lws2_32 -s
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#ifndef _WIN32
#include <sys/mman.h>
#else
// https://gist.github.com/r-lyeh-archived/bc29c8630dd778454001
#include <windows.h>
#define PROT_READ 0x1
#define PROT_WRITE 0x2
/* This flag is only available in WinXP+ */
#ifdef FILE_MAP_EXECUTE
#define PROT_EXEC 0x4
#else
#define PROT_EXEC 0x0
#define FILE_MAP_EXECUTE 0
#endif
#define MAP_SHARED 0x01
#define MAP_PRIVATE 0x02
#define MAP_ANONYMOUS 0x20
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FAILED ((void *) -1)
#ifdef __USE_FILE_OFFSET64
# define DWORD_HI(x) (x >> 32)
# define DWORD_LO(x) ((x) & 0xffffffff)
#else
# define DWORD_HI(x) (0)
# define DWORD_LO(x) (x)
#endif
static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) {
if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
return MAP_FAILED;
if (fd == -1) {
if (!(flags & MAP_ANON) || offset)
return MAP_FAILED;
} else if (flags & MAP_ANON)
return MAP_FAILED;
DWORD flProtect;
if (prot & PROT_WRITE) {
if (prot & PROT_EXEC)
flProtect = PAGE_EXECUTE_READWRITE;
else
flProtect = PAGE_READWRITE;
} else if (prot & PROT_EXEC) {
if (prot & PROT_READ)
flProtect = PAGE_EXECUTE_READ;
else if (prot & PROT_EXEC)
flProtect = PAGE_EXECUTE;
} else
flProtect = PAGE_READONLY;
off_t end = length + offset;
HANDLE mmap_fd, h;
if (fd == -1)
mmap_fd = INVALID_HANDLE_VALUE;
else
mmap_fd = (HANDLE)_get_osfhandle(fd);
h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL);
if (h == NULL)
return MAP_FAILED;
DWORD dwDesiredAccess;
if (prot & PROT_WRITE)
dwDesiredAccess = FILE_MAP_WRITE;
else
dwDesiredAccess = FILE_MAP_READ;
if (prot & PROT_EXEC)
dwDesiredAccess |= FILE_MAP_EXECUTE;
if (flags & MAP_PRIVATE)
dwDesiredAccess |= FILE_MAP_COPY;
void *ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length);
if (ret == NULL) {
CloseHandle(h);
ret = MAP_FAILED;
}
return ret;
}
static void munmap(void *addr, size_t length)
{
UnmapViewOfFile(addr);
/* ruh-ro, we leaked handle from CreateFileMapping() ... */
}
#undef DWORD_HI
#undef DWORD_LO
#endif
int varint_decode(uint64_t *result, char *varint_p, char **end) {
char *p;
int shift;
int length;
uint64_t value;
p = varint_p;
length = 0;
value = 0;
shift = 0;
for (;;) {
value <<= shift;
value |= ((*p & 0x7f));
length++;
if ((*p & 0x80) == 0x0) {
break;
}
p++;
shift += 7;
}
if (end != NULL) {
*end = ++p;
}
*result = value;
return length;
}
char to_signed_byte(unsigned char value) {
int signed_value = value;
if (value >> 7) signed_value |= -1 << 7;
return signed_value;
}
int to_signed_int( unsigned int value ) {
int signed_value = value;
if (value >> 15) signed_value |= -1 << 15;
return signed_value;
}
long int to_signed_long( unsigned long int value ) {
long int signed_value = value;
if (value >> 31) signed_value |= -1 << 31;
return signed_value;
}
uint64_t swap64(uint64_t x) {
uint8_t i;
uint64_t y ;
uint8_t *px, *py;
px = (uint8_t *)&x;
py = (uint8_t *)&y;
for (i=0; i<8; i++) {
*(py+i) = *(px +(7-i));
}
return y;
}
uint64_t ntohll(uint64_t value) {
return 1 == ntohl(1) ? value : swap64(value);
}
#define FL __FILE__,__LINE__
#define VERBOSE if (g->verbose)
#define DEBUG if (g->debug)
#define DECODE_MODE_FREESPACE 1
#define DECODE_MODE_NORMAL 0
#define PAYLOAD_SIZE_MINIMUM 10
#define PAYLOAD_CELLS_MAX 1000
#define OVERFLOW_PAGES_MAX 10000
#define PARAM_VERSION "--version"
#define PARAM_HELP "--help"
#define PARAM_FINE_SEARCH "--fine-search"
#define PARAM_FREESPACE_ONLY "--freespace"
#define PARAM_FREESPACE_MINIMUM "--freespace-minimum="
#define PARAM_NO_BLOBS "--no-blobs"
#define PARAM_BLOB_SIZE_LIMIT "--blob-size-limit="
#define PARAM_CELLCOUNT_MIN "--cellcount-min="
#define PARAM_CELLCOUNT_MAX "--cellcount-max="
#define PARAM_ROWSIZE_MIN "--rowsize-min="
#define PARAM_ROWSIZE_MAX "--rowsize-max="
#define PARAM_PAGE_SIZE "--page-size="
#define PARAM_PAGE_START "--page-start=" // add to 0.5
#define PARAM_PAGE_END "--page-end=" // add to 0.5
#define PARAM_REMOVED_ONLY "--removed-only"
struct globals {
uint8_t debug;
uint8_t verbose;
char *input_file; // actual file name
char *db_origin; // the mmap'd file origin
char *db_end; // the computed end of the mmap'd file ( based on file size )
char *db_cfp; // current file position
char *db_cpp; // current page position
char *db_cpp_limit; // end of the current page
size_t db_size;
uint32_t page_size, page_count, page_number;
uint32_t page_start, page_end;
uint32_t freelist_first_page, freelist_page_count;
uint32_t *freelist_pages;
uint32_t freelist_pages_current_index;
int freelist_space_only;
int removed_only;
size_t freespace_minimum;
time_t date_upper, date_lower; // deprecated - now that Undark has become a generic tool
int cc_min, cc_max; // cell count limits
size_t rs_min, rs_max; // row/payload limits
int report_blobs; // do we even handle blob data
size_t blob_size_limit; // at which point do we cut over to dumping to *.blob files?
int blob_count;
int fine_search;
};
struct cell {
int t; // serial
int o; // offset
int s; // size
};
struct sql_payload {
uint64_t prefix_length;
uint64_t length;
uint64_t rowid;
uint64_t header_size;
int cell_count;
int cell_page;
int cell_page_offset;
struct cell cells[PAYLOAD_CELLS_MAX+1];
uint32_t overflow_pages[OVERFLOW_PAGES_MAX+1];
char *mapped_data, *mapped_data_endpoint;
};
struct sqlite_leaf_header {
int page_number;
int page_byte;
uint16_t freeblock_offset;
uint16_t freeblock_size;
uint16_t freeblock_next;
int cellcount;
int cell_offset;
int freebytes;
};
char version[] = "undark version 0.8, origin by Paul L Daniels ( pldaniels@pldaniels.com )\n";
char help[] = "-i <sqlite DB> [-d] [-v] [-V|--version] [--cellcount-min=<count>] [--cellcount-max=<count>] [--rowsize-min=<bytes>] [--rowsize-max=<bytes>] [--no-blobs] [--blob-size-limit=<bytes>] [--page-size=<bytes>] [--page-start=<number>] [--page-end=<number>] [--freespace] [--freespace-minimum=<bytes>]\n"
"\t-i: input SQLite3 format database\n"
"\t-d: enable debugging output (very large dumps)\n"
"\t-v: enable verbose output\n"
"\t-V|--version: show version of software\n"
"\t-h|--help: show this help\n"
"\t--cellcount-min: define the minimum number of cells a row must have to be extracted\n"
"\t--cellcount-max: define the maximum number of cells a row must have to be extracted\n"
"\t--rowsize-min: define the minimum number of bytes a row must have to be extracted\n"
"\t--rowsize-max: define the maximum number of bytes a row must have to be extracted\n"
"\t--no-blobs: disable the dumping of blob data\n"
"\t--blob-size-limit: all blobs larger than this size are dumped to .blob files\n"
"\t--fine-search: search DB shifting one byte at a time, rather than records\n"
"\t--page-size: hard code the page size for the DB (useful when header is damaged)\n"
"\t--removed-only: Dumps rows that have their key set to -1\n"
"\t--freespace: search for rows in the freespace\n";
int UNDARK_init( struct globals *g ) {
g->page_size = 0;
g->page_count = 0;
g->page_number = 1;
g->debug = 0;
g->verbose = 0;
g->input_file = NULL;
g->date_lower = 0;
g->date_upper = 0;
g->cc_max = PAYLOAD_CELLS_MAX;
g->cc_min = 2;
g->rs_max = SIZE_MAX;
g->rs_min = 10;
g->blob_count = 0;
g->report_blobs = 1;
g->blob_size_limit = SIZE_MAX; // C99
g->fine_search = 0;
g->freelist_space_only = 0;
g->removed_only = 0;
g->freespace_minimum = SIZE_MAX; // C99
g->page_start = 0;
g->page_end = 0;
g->db_cfp = NULL;
g->db_cpp = NULL;
return 0;
}
int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) {
int param;
if (argc < 2) {
fprintf(stderr,"%s", help);
fprintf(stderr,"Sizeof double = %ld, long double = %ld\n", sizeof(double), sizeof(long double));
exit(1);
}
for (param = 1; param < argc; param++) {
char *p = argv[param];
if (strcmp(p, "-V") == 0) { fprintf(stdout,"%s", version); exit(0); }
if (strcmp(p, "-h") == 0) { fprintf(stdout,"%s %s", argv[0], help); exit(0); }
if (strcmp(p, "-d") == 0) g->debug = 1;
if (strcmp(p, "-v") == 0) g->verbose = 1;
if (strcmp(p, "-i") == 0) {
param++;
if (param < argc) {
g->input_file = argv[param];
} else {
fprintf(stderr,"Not enough paramters\n");
exit(1);
}
} else if (strncmp(p,"--", 2) == 0) {
DEBUG fprintf(stderr,"Parameter: '%s' %d\n", p, (int)strlen(PARAM_BLOB_SIZE_LIMIT));
// extended parameters
if (strncmp(p,PARAM_VERSION, strlen(PARAM_VERSION))==0) {
fprintf(stderr,"%s", version);
exit(0);
} else if (strncmp(p,PARAM_HELP, strlen(PARAM_HELP))==0) {
fprintf(stderr,"%s %s", argv[0], help);
exit(0);
} else if (strncmp(p,PARAM_NO_BLOBS, strlen(PARAM_NO_BLOBS))==0) {
g->report_blobs = 0;
} else if (strncmp(p,PARAM_BLOB_SIZE_LIMIT, strlen(PARAM_BLOB_SIZE_LIMIT))==0) {
p = p +strlen(PARAM_BLOB_SIZE_LIMIT);
g->blob_size_limit = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_PAGE_START, strlen(PARAM_PAGE_START))==0) {
p = p +strlen(PARAM_PAGE_START);
g->page_start = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_PAGE_END, strlen(PARAM_PAGE_END))==0) {
p = p +strlen(PARAM_PAGE_END);
g->page_end = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_PAGE_SIZE, strlen(PARAM_PAGE_SIZE))==0) {
p = p +strlen(PARAM_PAGE_SIZE);
g->page_size = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_FREESPACE_MINIMUM, strlen(PARAM_FREESPACE_MINIMUM))==0) {
p = p +strlen(PARAM_FREESPACE_MINIMUM);
g->freespace_minimum = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_CELLCOUNT_MIN, strlen(PARAM_CELLCOUNT_MIN))==0) {
p = p +strlen(PARAM_CELLCOUNT_MIN);
g->cc_min = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_CELLCOUNT_MAX, strlen(PARAM_CELLCOUNT_MAX))==0) {
p = p +strlen(PARAM_CELLCOUNT_MAX);
g->cc_max = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_ROWSIZE_MIN, strlen(PARAM_ROWSIZE_MIN))==0) {
p = p +strlen(PARAM_ROWSIZE_MIN);
g->rs_min = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_ROWSIZE_MAX, strlen(PARAM_ROWSIZE_MAX))==0) {
p = p +strlen(PARAM_ROWSIZE_MAX);
g->rs_max = strtol( p, NULL, 10 );
} else if (strncmp(p,PARAM_FINE_SEARCH, strlen(PARAM_FINE_SEARCH))==0) {
g->fine_search = 1;
} else if (strncmp(p,PARAM_FREESPACE_ONLY, strlen(PARAM_FREESPACE_ONLY))==0) {
g->freelist_space_only = 1;
} else if (strncmp(p,PARAM_REMOVED_ONLY, strlen(PARAM_REMOVED_ONLY))==0) {
g->removed_only = 1;
} else {
fprintf(stderr,"Cannot interpret extended parameter: \"%s\"\n",p);
exit(1);
}
}
}
if (g->input_file == NULL) {
fprintf(stderr,"ERROR: Need input file\n");
exit(1);
}
return 0;
}
int tdump( char *p, uint16_t l ) {
while (l--) {
if (isprint(*p)) fprintf(stdout,"%c", *p); else fprintf(stdout,".");
p++;
}
return 0;
}
// Dumps text in a SQL friendly format ( doubling of single quotes )
int sqltdump( char *p, uint16_t l ) {
fprintf(stdout,"\"");
while (l--) {
if (*p == '\"') fprintf(stdout,"\"");
if (isprint(*p)) fprintf(stdout,"%c", *p); else fprintf(stdout,".");
p++;
}
fprintf(stdout,"\"");
return 0;
}
int blob_dump( unsigned char *p, uint16_t l ) {
fprintf(stdout,"x'");
while (l--) {
fprintf(stdout,"%02X", ( unsigned char)*p);
p++;
}
fprintf(stdout,"'");
return 0;
}
// Combo hex + text dump, 16 byte wide rows
int hdump( unsigned char *p, uint16_t length, char *msg ) {
int oc = 0;
int ll = length;
fprintf(stdout,"%s: Hexdumping %d bytes from %p\n", msg, ll, p);
uint16_t c = 0;
if (p == NULL) {
fprintf(stdout,"ERROR: NULL passed.\n");
// exit(1);
}
while (ll > 0) {
int br;
unsigned char *op;
fprintf(stdout,"%04X [%06d] ", oc, ll);
oc+=16;
br = ll;
op = p;
while (ll--) {
fprintf(stdout, "%02X ", *p);
c++;
p++;
if (c%16 == 0) break;
}
ll = br;
p = op;
c = 0;
fprintf(stdout, " [%06d]", ll );
while (ll--) {
fprintf(stdout,"%c", isprint(*p)?*p:'.');
c++;
p++;
if (c%16 == 0) break;
}
fprintf(stdout," %d\n",ll);
}
fprintf(stdout,"\n");
return 0;
}
int blob_dump_to_file( struct globals *g, char *p, size_t l ) {
int f;
ssize_t written;
char fn[1024];
snprintf(fn, sizeof(fn), "%d.blob", g->blob_count);
DEBUG fprintf(stdout,"%s:%d:DEBUG: Writing %ld bytes to %s\n", FL , l, fn );
f = open(fn, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR );
if (!f) { fprintf(stderr,"Cannot open %s (%s)\n", fn, strerror(errno)); return 1; }
written = write(f, p, l);
if ( written != l ) {
fprintf(stderr,"Wrote %ld of %ld bytes to %s ( %s )\n", written, l, fn, strerror(errno));
close(f);
return 1;
}
close(f);
return 0;
}
// Searches for the needle among a haystack possibly containing \0 delimeted data.
char *bstrstr( char *haystack, char *needle, char *limit ) {
char *p;
if ((!needle)||(*needle == '\0')) return NULL;
if (!haystack) return NULL;
if ((limit == NULL)||(limit <=haystack)) return NULL;
p = haystack;
while (p < limit) {
char *tp;
char *tn;
tn = needle;
tp = p;
while ((*tn) && (tp < limit) && (*tp == *tn)) {
tn++;
tp++;
if (*tn == '\0') return p;
}
p++;
}
return NULL;
}
// Decodes the payload header data so that we can then later pull the actual data from the file.
int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payload *payload, int mode, size_t forced_length ) {
int t = 0, offset;
char *plh_ep; // payload header end point
char *base = p;
DEBUG {
fprintf(stdout,"%s:%d:DEBUG:DECODING ROW-------------------------MODE:%s\n", FL, (mode?"Freespace":"Standard"));
hdump((unsigned char *)p, 16, "Decode_row start data");
}
payload->overflow_pages[0] = 0;
payload->cell_count = 0;
if ( mode == DECODE_MODE_FREESPACE ) {
payload->length = forced_length -4; // and we still have to deduct the payload header size
} else {
varint_decode( &(payload->length), p, &p );
}
if (payload->length > g->db_size) return 0;
if (payload->length < g->rs_min) return 0;
if (payload->length > g->rs_max) return 0;
DEBUG fprintf(stdout,"%s:%d:DEBUG:Payload size: %lu\n", FL, (unsigned long int)payload->length);
if (mode == DECODE_MODE_FREESPACE) {
payload->rowid = 1;
} else {
varint_decode(&(payload->rowid), p, &p);
}
if (payload->rowid < 1) return 0;
payload->prefix_length = p -base; // store this so we know how many bytes the length + Row ID took up.
plh_ep = p; // first set up the beginning of the payload header array size.
varint_decode( &(payload->header_size), p, &p );
if (payload->header_size > g->page_size) return 0;
if (mode == DECODE_MODE_FREESPACE) {
payload->length -= payload->header_size;
DEBUG fprintf(stdout,"%s:%d:DEBUG: Looking for %lu bytes of data after the payload header\n", FL , (long unsigned int)payload->length);
fflush(stdout);
}
// If the payload size exceeds the page_size, then we have to do some more checking
if (payload->length > (g->page_size -35)) {
uint32_t tmp, ovp;
int ovpi = 1;
// get the FIRST overflow page
memcpy(&tmp, data_endpoint -4, 4);
ovp = payload->overflow_pages[0] = ntohl(tmp);
// if the page is beyond the file range, then we've just got defective input data
if (ovp > g->page_count) return 0;
DEBUG fprintf(stdout,"%s:%d:DEBUG: First overflow page = %lu\n", FL , (long unsigned int)ovp);
DEBUG hdump((unsigned char *)(data_endpoint -16), 16, "First overflow page start data");
while (ovp > 0) {
void *calculated_address;
calculated_address = g->db_origin +( (ovp -1) *g->page_size);
DEBUG fprintf(stdout,"%s:%d:DEBUG: Calculated address: %p\n", FL, calculated_address);
// test for seeking beyond the db limit
//if ((g->db_origin +((ovp -1) *g->page_size)) > (g->db_end -4)) {
if ( calculated_address > (void *)(g->db_end -4)) { //PLD:20141220-0000
DEBUG fprintf(stdout,"%s:%d:ERROR: Seek beyond end of data looking for overflow page (%p > %p)\n", FL, calculated_address, g->db_end);
break;
}
if ( calculated_address < (void *)(g->db_origin)) { //PLD:20141220-0000
DEBUG fprintf(stdout,"%s:%d:ERROR: Seek before DB starts (%p < %p)\n", FL, calculated_address, g->db_origin);
break;
}
memcpy(&tmp, calculated_address, 4);
ovp = payload->overflow_pages[ovpi] = ntohl(tmp);
DEBUG fprintf(stdout,"%s:%d:DEBUG: overflow page[%d] = %d\n", FL , ovpi, ovp);
DEBUG fflush(stdout);
ovpi++;
if (ovpi > OVERFLOW_PAGES_MAX) {
fprintf(stdout,"ERROR: No more space for overflow pages\n");
fflush(stdout);
payload->overflow_pages[0] = 0;
break;
}
payload->overflow_pages[ovpi] = 0;
}
DEBUG {
fprintf(stdout,"DEBUG: Total of %d overflow pages\n",ovpi);
ovpi = 0;
while (payload->overflow_pages[ovpi]) {
fprintf(stdout,"DEBUG: Overflow %d->%d\n", ovpi, payload->overflow_pages[ovpi]);
ovpi++;
}
}
} // overflow handling
if (payload->header_size > g->page_size) return 0; // sorry, no can do with the way we're playing this decoding game.
if (payload->header_size < 2) return 0; // need at least 2 bytes
plh_ep += payload->header_size; // if we got a sane value, then we can use this for the full decode size ( includes the size of the first varint telling us the size )
DEBUG { fprintf(stdout,"[L:%lld][id:%lld][PLHz:%lld]",(long long int) payload->length, (long long int)payload->rowid, (long long int)payload->header_size); }
t = 0;
offset = 0;
while (1) {
uint64_t s;
int vil;
vil = varint_decode( &s, p, &p );
if (vil > 8) return 0; // no var int should be bigger than 8 bytes.
payload->cells[t].t = s; // set the type
switch (s) {
case 0: s = 0; break;
case 1: s = 1; break;
case 2: s = 2; break;
case 3: s = 3; break;
case 4: s = 4; break;
case 5: s = 6; break;
case 6: case 7: s = 8; break;
case 8: case 9: s = 0; break;
case 10: case 11: DEBUG fprintf(stdout,"%s:%d:DEBUG: celltype 10/11 reserved, aborting row.\n",FL); s = 0; return 0; break;
default:
if ((s >= 12) && ((s & 0x01) == 0)) {
payload->cells[t].t = 12;
s = (s - 12)/2;
} else if ((s >= 13) && ((s&0x01) == 1)) {
payload->cells[t].t = 13;
s = (s - 13)/2;
}
break;
}
payload->cells[t].s = s; // set the size/length
payload->cells[t].o = (plh_ep +offset) -base;
offset += payload->cells[t].s;
if (offset > payload->length) return 0;
DEBUG { fprintf(stdout,"[%d:%d:%d-%d(%ld)]", t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o, plh_ep -p ); }
if (p >= plh_ep) break;
t++;
payload->cell_count++;
if ( t > g->cc_max ) return 0;
} // while decoding the cells
if (p == plh_ep) {
DEBUG {
fprintf(stdout,"DEBUG: Payload head size match. (%ld =? %ld)\n ", p -base,plh_ep -base);
fprintf(stdout,"DEBUG: Data size by cell meta sum = %d\n ", offset );
}
} else {
DEBUG {
fprintf(stdout,"DEBUG: Payload scan end point, and predicted end point didn't match, difference %ld \n", p -plh_ep );
}
}
if ( t < g->cc_min ) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: cell count under the minimum, so aborting\n", FL );
return 0;
}
DEBUG fprintf(stdout,"Offset [%u] + headersize [%lu] = length check [%lu]... \n", offset, (unsigned long int)payload->header_size, (unsigned long int)payload->length);
if (mode == DECODE_MODE_FREESPACE) {
/** there can often be multiple entries within freespace, so we have to be
* a little looser with our acceptance criterion
*/
if (offset <= payload->length) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: FREESPACE SUBMATCH FOUND ( %u of %lu used )\n", FL , offset, (long unsigned int) payload->length);
return (offset +payload->header_size +4);
}
}
if (offset + payload->header_size == payload->length) {
DEBUG fprintf(stdout,"\nMATCH FOUND!\n");
return 1;
}
return 0;
}
int dump_row( struct globals *g, char *base, char *data_endpoint, struct sql_payload *payload, int mode ) {
int t = 0;
int ovpi;
void *addr;
DEBUG fprintf(stdout,"\n-DUMPING ROW------------------\n");
DEBUG hdump((unsigned char *)base, 16, "Dump_row starting data");
if ( payload->length > g->db_size ) {
DEBUG fprintf(stdout,"%s:%d:ERROR: Nonsensical payload length of %ld requested, ignoring.\n", FL, (long int)payload->length);
return -1;
}
if (payload->overflow_pages[0] == 0) {
payload->mapped_data = base;
payload->mapped_data_endpoint = data_endpoint;
} else {
payload->mapped_data = malloc( (payload->length +100) *sizeof(char) );
if ( !payload->mapped_data ) {
fprintf(stderr,"%s:%d:ERROR: Cannot allocate %ld bytes for mapped data\n", FL, (long int)payload->length +100);
return -1;
}
DEBUG fprintf(stdout,"ALLOCATED %d bytes to mapped data\n", (int)(payload->length +100) );
if (!payload->mapped_data){ fprintf(stderr,"ERROR: Cannot allocate %d bytes for payload\n", (int)(payload->length +1)); return 0; }
memset( payload->mapped_data, 'X', payload->length +1 );
// load in the first, default page.
DEBUG fprintf(stdout,"Copying data for initial page\n");
memcpy(payload->mapped_data, base, data_endpoint -base );
payload->mapped_data_endpoint = payload->mapped_data +(data_endpoint -base -4);
// DEBUG hdump( (unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data +4 );
// Load in the overflow pages (if any)
ovpi = 0;
while (payload->overflow_pages[ovpi]) {
DEBUG fprintf(stdout,"Copying data from file to memory for page %d to offset [%d]\n", payload->overflow_pages[ovpi], (int)(payload->mapped_data_endpoint -payload->mapped_data));
addr = g->db_origin +((payload->overflow_pages[ovpi]-1) *g->page_size) +4; //PLD:20141221-2240 segfault fix
if (( addr < (void *)g->db_origin) || ( addr+4 > (void *)g->db_end)) {
DEBUG fprintf(stdout,"%s:%d:dump_row:ERROR: page seek request outside of boundaries of file (%p < %p > %p)\n", FL, g->db_origin, addr, g->db_end);
return -1;
}
memcpy(payload->mapped_data_endpoint, addr, g->page_size -4);
payload->mapped_data_endpoint += g->page_size -4;
// DEBUG hdump( (unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data );
ovpi++;
}
}
DEBUG hdump((unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data, "Payload mapped data" );
if (mode == DECODE_MODE_FREESPACE) {
t = 0;
fprintf(stdout,"-1");
} else t = -1;
while (t <= payload->cell_count) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: Cell[%d], Type:%d, size:%d, offset:%d\n", FL , t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o);
if (t == -1) fprintf(stdout,"%ld", (long unsigned int) payload->rowid);
if (t>=0) { fprintf(stdout,",");
switch (payload->cells[t].t) {
case 0: fprintf(stdout,"NULL"); break;
case 1: fprintf(stdout,"x%d", to_signed_byte(*(payload->mapped_data +payload->cells[t].o)) ); break;
case 2: {
uint16_t n;
memcpy(&n, payload->mapped_data +payload->cells[t].o, 2 );
fprintf(stdout,"%d" , to_signed_int(ntohs(n)));
}
break;
case 3: {
uint32_t n;
memcpy(&n, payload->mapped_data +payload->cells[t].o, 3 );
fprintf(stdout,"%ld", to_signed_long(ntohl(n)));
}
break;
case 4: {
uint32_t n;
memcpy(&n, payload->mapped_data +payload->cells[t].o, 4 );
fprintf(stdout,"%ld", to_signed_long(ntohl(n)));
}
break;
case 5: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break;
case 6: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break;
case 7:
{
uint64_t n;
uint64_t nn;
double *zz;
memcpy(&n, payload->mapped_data +payload->cells[t].o, 8 );
nn = (double) ntohll(n);
// hdump( &nn, 8, "\nFPPP: ");
zz = (double *)&nn;
fprintf(stdout,"%f",*zz);
}
break;
case 8: fprintf(stdout,"0" ); break;
case 9: fprintf(stdout,"1" ); break;
case 12:
if ( g->report_blobs) {
if (payload->cells[t].s < g->blob_size_limit) {
DEBUG fprintf(stdout,"%s:%d:DEBUG:Not Dumping data to blob file, keeping in CSV\n", FL );
blob_dump((unsigned char *) (payload->mapped_data +payload->cells[t].o), payload->cells[t].s );
} else {
// dump the blob to a file.
DEBUG fprintf(stdout,"%s:%d:DEBUG:Dumping data to %d.blob [%d bytes]\n", FL ,g->blob_count, payload->cells[t].s);
blob_dump_to_file( g, (payload->mapped_data +payload->cells[t].o), payload->cells[t].s );
DEBUG fprintf(stdout,"\"%d.blob\"", g->blob_count);
}
}
g->blob_count++;
break;
case 13:
DEBUG fprintf(stdout,"%s:%d:DEBUG: Dumping text-13\n", FL );
sqltdump( payload->mapped_data +payload->cells[t].o, payload->cells[t].s );
break;
default:
fprintf(stderr,"Invalid cell type '%d'", payload->cells[t].t);
DEBUG fprintf(stdout,"%s:%d:DEBUG: Invalid cell type '%d'", FL, payload->cells[t].t);
DEBUG hdump( (unsigned char *) base, 128, "Invalid cell type" );
return 0;
break;
} // switch cell type
}
t++;
} // while decoding the cells
fprintf(stdout,"\n");
fflush(stdout);
if (payload->overflow_pages[0] != 0) {
free( payload->mapped_data );
}
return 0;
}
// Finds rows within a block.
char *find_next_row( struct globals *g, char *s, char *end_point, char *global_start, int mode, size_t forced_length ) {
char *p;
struct sql_payload sql;
DEBUG fprintf(stdout,"find_next_row: MODE: %d\n", mode );
if (s == NULL) fprintf(stdout,"ERROR: NULL passed as search-space parameter\n");
p = s;
do {
int row;
row = decode_row( g, p, end_point, &sql, mode, forced_length );
if (row) {
DEBUG fprintf(stdout,"ROWID: %ld found [+%ld] record size: %d bytes\n", (unsigned long int)sql.rowid, p -global_start, (unsigned int)( sql.length+sql.prefix_length ));
fflush(stdout);
/** If we're only wanting the removed, no-key-value rows, then
* continue to the next row
*/
if ((g->removed_only)&&(row >= 0)) {
p++;
continue;
}
if ((mode == DECODE_MODE_NORMAL)&&( g->freelist_space_only == 1)) {
// do nothing
} else {
dump_row( g, p, end_point, &sql, mode );
}
fflush(stdout);
if (mode == DECODE_MODE_NORMAL) {
if (g->fine_search) p++;
else p+= sql.length;
} else {
if (row >= forced_length) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: No more data left in freespace block to examine\n", FL);
p = end_point;
break;
} else {
p+=row; forced_length -= row;
DEBUG hdump((unsigned char *)p,64, "After freespace decode");
}
}
} else {
p++;
}
} while (p < end_point -PAYLOAD_SIZE_MINIMUM);
return NULL;
}
int main( int argc, char **argv ) {
int fd;
struct globals globo, *g;
struct stat st;
char *p;
int stat_result;
g = &globo;
UNDARK_init( g );
UNDARK_parse_parameters( argc, argv, g );
stat_result = stat( g->input_file, &st );
if (stat_result != 0) {
fprintf(stderr,"ERROR: Cannot access input file '%s' ( %s )\n", g->input_file, strerror(errno));
exit(1);
}
fd = open( g->input_file, O_RDONLY );
g->db_size = st.st_size;
g->db_origin = mmap( NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0 );
g->db_end = g->db_origin +st.st_size -1;
//fprintf(stderr,"DB origin: %p\nDB end: %p\n", g->db_origin, g->db_end );
// If the page size is already set via parameter, then skip
if (g->page_size == 0) {
p = g->db_origin +16;
g->page_size = (*(p+1)) | ((*p)<<8);
}
// Get the number of pages that are supposed to be in the database
p = g->db_origin +28;
memcpy( &g->page_count, g->db_origin +28, 4 ); // copy the page count from the header
g->page_count = ntohl( g->page_count ); // convert to local format
DEBUG fprintf(stdout,"Pagesize: %u, Pagecount: %u\n", g->page_size, g->page_count);
// Get the free list meta data
memcpy( &g->freelist_first_page, g->db_origin +32, 4 ); // copy the page count from the header
g->freelist_first_page = ntohl( g->freelist_first_page );
DEBUG fprintf(stdout,"First page of freelist trunk: %d\n", g->freelist_first_page );
memcpy(&g->freelist_page_count, g->db_origin +36, 4); // copy the page count from the header
g->freelist_page_count = ntohl( g->freelist_page_count );
DEBUG fprintf(stdout,"Freelist page count: %d\n", g->freelist_page_count );
// Get the actual free list pages
if (0) {
if (g->freelist_page_count) {
g->freelist_pages = malloc( (g->freelist_page_count +1) * sizeof(uint32_t) );
if (!g->freelist_pages) {
fprintf(stderr,"ERROR: Cannot allocate memory to build page free list\n");
exit(1);
} else {
uint32_t next_page;
uint32_t pli;
next_page = g->freelist_first_page;
g->freelist_pages[0] = next_page;
g->freelist_pages[1] = 0;
pli = 1;
if ( pli < g->freelist_page_count ) {
do {
uint32_t tmp_page, leaf_page_count;
char *fp, *current_page_endpoint;
uint32_t jump;
jump = ((next_page-2) *g->page_size);
fp = g->db_origin +jump;
current_page_endpoint = fp +g->page_size;
fprintf(stdout,"Freelist - current trunk page = %d [ offset: %X ]\n", next_page, jump);
hdump((unsigned char*)fp, g->page_size, "Current trunk page");
DEBUG fflush(stdout);
memcpy( &tmp_page, fp, sizeof(uint32_t));
tmp_page = ntohl(tmp_page);
fp += sizeof(uint32_t);
DEBUG fprintf(stdout,"Next trunk page (if any): %d\n",tmp_page);
DEBUG fflush(stdout);
memcpy( &leaf_page_count, fp, sizeof(uint32_t));
leaf_page_count = ntohl(leaf_page_count);
fp += sizeof(uint32_t);
DEBUG fprintf(stdout,"Leaf page count: %d\n",leaf_page_count);
DEBUG fflush(stdout);
//while ((pli <= g->freelist_page_count)&&( fp < current_page_endpoint )) {
while (( fp < current_page_endpoint )&&( leaf_page_count-- )) {
hdump((unsigned char*)fp, 16, "Next free page possible");
memcpy( &(g->freelist_pages[pli]), fp, sizeof(uint32_t));
g->freelist_pages[pli] = ntohl( g->freelist_pages[pli] );
DEBUG fprintf(stdout, "Next free page[%d]: %d\n", pli, g->freelist_pages[pli]);
if (g->freelist_pages[pli] == 0) {
fprintf(stdout,"End of freelist detected\n");
fflush(stdout);
break;
}
fflush(stdout);
pli++;
fp+= sizeof(uint32_t);
}
next_page = tmp_page;
} while (next_page > 0);
fprintf(stdout,"Freepages - END\n");
fflush(stdout);
}
} // if there were more than one page
}
}
g->db_cfp = g->db_cpp = g->db_origin;
DEBUG fprintf(stdout,"%s:%d:DEBUG: Commence decoding data\n", FL );
fflush(stdout);
while (g->db_cfp < g->db_end ) {
struct sqlite_leaf_header leaf;
int freeblock_mode = 0;
/* load the next page from the file in to the scratch pad */
g->db_cfp = g->db_cpp;
g->db_cpp_limit = g->db_cpp +g->page_size ; // was -1 ?
DEBUG fprintf(stdout,"\n\n%s:%d:-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=START.\n", FL);
// process the block, mostly this is just removing any 0-bytes from the block
// so our strstr() calls aren't prematurely terminated.
DEBUG {
char *p;
size_t l;
int bc = 0;
fprintf(stdout,"%s:%d:Dumping main block in RAW... [ Page No: %lu, Offset: %lu (0x%X), size : %d ]\n"
, FL
, (long unsigned int)g->page_number
, (long unsigned int)(g->db_cpp -g->db_origin)
, (unsigned int)(g->db_cpp -g->db_origin)
, g->page_size
);
p = g->db_cfp;
l = g->page_size;
while ((l--)&&(p)) {
{ if (isprint(*p)) { fprintf(stdout,"%c", *p); } else fprintf(stdout,"_");}
p++;
bc++;
if (bc%128 == 0) fprintf(stdout,"\n");
}
fprintf(stdout,"\n");
fflush(stdout);
} // debug
leaf.freeblock_offset = 0;
leaf.freeblock_size = 0;
leaf.freeblock_next = 0;
leaf.page_number = g->page_number;
/* Decode the page header */
if ((g->db_cfp < g->db_end) && *(g->db_cfp) == 13) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: Decoding page header for page %d\n", FL , g->page_number );
fflush(stdout);
leaf.page_byte = 13;
/**
* Get freeblock offset and determine if we have a free block in this
* page that needs to be inspected. This is one of the more commonly
* needed parts of data for our row recovery
*
*/
memcpy( &(leaf.freeblock_offset), (g->db_cfp +1), 2 );
leaf.freeblock_offset = ntohs( leaf.freeblock_offset );
if (leaf.freeblock_offset > 0) {
uint16_t next, sz, off;
freeblock_mode = 1;
off = leaf.freeblock_offset;
DEBUG fprintf(stdout,"%s:%d:DEBUG: FREEBLOCK mode ON: header decode [offset=%u]\n", FL , leaf.freeblock_offset);
do {
DEBUG hdump((unsigned char *)(g->db_cfp +off), 16, "Freeblock header data");
memcpy( &next, ( g->db_cfp +off ), 2 );
next = ntohs( next );
memcpy( &sz, ( g->db_cfp +off +2 ), 2 );
sz = ntohs( sz );
DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock size = %u, next position = %u\n", FL, sz, next );
if (next) off = next;
} while (next);
DEBUG fprintf(stdout,"%s:%d:DEBUG: END OF FREEBLOCK TRACE\n", FL);
memcpy( &(leaf.freeblock_next), ( g->db_cfp +leaf.freeblock_offset ), 2 );
leaf.freeblock_next = ntohs( leaf.freeblock_next );
memcpy( &(leaf.freeblock_size), ( g->db_cfp +leaf.freeblock_offset +2 ), 2 );
leaf.freeblock_size = ntohs( leaf.freeblock_size );
}
DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock offset = %u, size = %u, next block = %u \n", FL , leaf.freeblock_offset, leaf.freeblock_size, leaf.freeblock_next );
if (leaf.freeblock_size > 0) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock data [ %d bytes total [4 bytes for header] ]\n", FL, leaf.freeblock_size );
DEBUG hdump( (unsigned char *)(g->db_cfp +leaf.freeblock_offset+4), leaf.freeblock_size-4, "Actual data in free block" );
}
fflush(stdout);
// leaf.freeblock_offset = ntohs( ta );
leaf.cellcount = ntohs(*(g->db_cfp+3));
leaf.cell_offset = ntohs(*(g->db_cfp+5));
leaf.freebytes = (*(g->db_cfp+7));
DEBUG fprintf(stdout,"%s:%d:DEBUG: PAGEHEADER:%d pagebyte: %d, freeblock offset: %d, cell count: %d, first cell offset %d, free bytes %d\n", FL
, leaf.page_number
, leaf.page_byte
, leaf.freeblock_offset
, leaf.cellcount
, leaf.cell_offset
, leaf.freebytes
);
/**
* If we're wanting free block sourced data, then simply jump
* to the start of the free block space and commence the searching
* in the next section ( find_next_row ).
*
* After this the g->db_cfp pointer should be sitting on the first
* varint of the payload header which defines the header length
* (inclusive)
*
* Detecting rows in the freeblocks is done differently to the
* normal data, so
*
*/
if (g->freelist_space_only) {
if ((leaf.freeblock_offset > 0) && (leaf.freeblock_size > 0)) {
DEBUG fprintf(stdout,"%s:%d:DEBUG: Shifting to freespace at %d from page start\n", FL , leaf.freeblock_offset);
g->db_cfp = g->db_cfp + leaf.freeblock_offset +4;
DEBUG fprintf(stdout,"%s:%d:DEBUG: New position = %p\n", FL , g->db_cfp);
DEBUG hdump((unsigned char *)g->db_cfp -4,32, "Scratch pointer at freespace data start (including 4 byte header)");
DEBUG fflush(stdout);
}
}
fflush(stdout);
} // if we have a leaf page, which we can decode the header on.
//if ((leaf.page_byte == 13)) {
if (1) {
char *row;
row = g->db_cfp;
DEBUG fprintf(stdout,"%s:%d:DEBUG: g->db_cfp search at = %p\n", FL , g->db_cfp);
do {
if ((row > g->db_origin)&&(row < g->db_end)) {
row = find_next_row( g, row, g->db_cpp_limit, g->db_cfp, freeblock_mode, leaf.freeblock_size );
//if (row > g->db_end) fprintf(stdout,"ERROR: beyond end point\n");
if (row > g->db_cpp_limit) fprintf(stdout,"ERROR: beyond end point\n");
if (row < g->db_cfp) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row location not in g->db_cfp page\n", FL );
if (row == NULL) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row has been returned as NULL\n", FL );
DEBUG fprintf(stdout,"%s:%d:DEBUG: ROW found at offset: %ld\n", FL, row-g->db_cfp);
} else {
break;
}
} while (row && (row < g->db_cpp_limit ));
//} while (row && (row < g->db_cpp_limit ) && (row < g->db_end) );
DEBUG fprintf(stdout,"%s:%d:DEBUG: Finished searching for rows in DB page %d\n", FL , g->page_number);
}
g->db_cpp += g->page_size;
g->page_number++;
} // while (data < endpoint)
close(fd);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment