Skip to content

Instantly share code, notes, and snippets.

@saml
Forked from egguy/mongo_db_recover_delete_record.py
Last active December 25, 2015 19:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saml/7030714 to your computer and use it in GitHub Desktop.
Save saml/7030714 to your computer and use it in GitHub Desktop.
"""A little script to recover deleted recording of a mongoDB db file
There's no optimization but it work and has saved me
"""
import struct
import bson
import sys
def decode_chunck(chunck):
"Try to decode a chunck"
#if not bson.is_valid(chunck):
# return None
try:
result = bson.decode_all(chunck)[0]
if not result:
return None
else:
# if there's all the searched field, return it
if 'path' in result and 'storageId' in result:
return result
except Exception:
return None
return None
def generate_chunck(data, pos=0):
"Generator to create chunck"
with open(data,'rb') as f:
a = f.read()
size = len(a)
while pos < size:
# Determine the size of the possible bson encoded data
bson_size = struct.unpack("<I", a[pos:pos + 4])[0]
# If the bson is bigger than the file, reject it
if bson_size+pos > size-1:
pos += 1
continue
# A bson should end by \x00
# http://bsonspec.org/#/specification
if a[pos+bson_size] != '\x00':
pos += 1
continue
# Chunck it
chunck = a[pos:pos+bson_size]
pos += 1
yield chunck
#generate_chunck(sys.argv[1])
# argv[1] = the file to recover
# argv[2] = Where to start in the file
for chunck in generate_chunck(sys.argv[1], 0):
result = decode_chunck(chunck)
if result:
print(result)
import sys
import json
class Parser(object):
def __init__(self, stream):
self.stream = stream
self.current_state = None
self.asset = None
def _start(self, line):
if line == 'path':
self.current_state = self._path_begin
def _back_to_path_begin(self, from_state):
print('%s -> _path_begin: %s' % (from_state, self.asset))
self.asset = None
self.current_state = self._path_begin
def _path_begin(self, line):
if line == 'path':
return self._back_to_path_begin('_path_begin')
self.asset = {'path': line}
self.current_state = self._path_end
def _path_end(self, line):
if line == 'path':
return self._back_to_path_begin('_path_end')
if line == 'storageId':
self.current_state = self._storageId_begin
def _storageId_begin(self, line):
if line == 'path':
return self._back_to_path_begin('_storageId_begin')
self.asset['storageId'] = line
self.current_state = self._start
return self.asset
def start(self, output):
self.current_state = self._start
for line in self.stream:
result = self.current_state(line[:-1])
if result is not None:
output.write(json.dumps(result))
output.write("\n")
#usage: $0 output.json
with open(sys.argv[1], 'w') as output:
Parser(sys.stdin).start(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment