Skip to content

Instantly share code, notes, and snippets.

@pedramamini
Last active March 16, 2019 15:34
Show Gist options
  • Save pedramamini/54df2648a1b73adf9a0d6d0b1a75ca0a to your computer and use it in GitHub Desktop.
Save pedramamini/54df2648a1b73adf9a0d6d0b1a75ca0a to your computer and use it in GitHub Desktop.
Carve files out of a blob.
#!/usr/bin/env python
# source: https://gist.github.com/pedramamini/54df2648a1b73adf9a0d6d0b1a75ca0a
import os
import re
import sys
import errno
import string
# debug output.
DEBUG = False
# add start/end markings to this datastructure.
START, END, EXTENSION = range(3)
CARVE_PAIRINGS = \
{
# label, start marker, end marker, file extension.
# NOTE: don't use None for blank entries, use "".
# NOTE: markers are in regular expression format!
"JPG-NORMAL" : ["\xff\xd8\xff(\xe0|\xe1|\xfe)", "\xff\xd9", "jpg"],
"JPG-WIDE" : ["\xff\xd8\xff", "\xff\xd9", "jpg"],
"JPG-END" : ["\xff\xd8\xff", "", "jpg"],
"PNG" : ["\x89PNG", "", "png"],
"GIF" : ["GIF8(9|7)a", "", "gif"],
"CDF-OLE" : ["\xD0\xCF\x11\xE0", "", "ole"],
"ZIP" : ["PK\x03\x04", "", "zip"],
}
########################################################################################################################
def usage (msg=None):
sys.stderr.write("USAGE: %s </path/to/input/file> [</path/to/output/directory>] [-v]\n" % sys.argv[0])
sys.stderr.write("\nCarve start/end marker pairings out of a file. Supported types:\n")
for k,v in CARVE_PAIRINGS.iteritems():
log = "%20s start: %-20s end:%5s\n"
log %= k, hexify(v[START], True), hexify(v[END], True) if v[END] else None
sys.stderr.write(log)
if msg:
sys.stderr.write("\nError: %s\n" % msg)
sys.exit(1)
########################################################################################################################
def mkdir_p (path):
try:
os.makedirs(path)
except OSError, e:
if e.errno == errno.EEXIST:
pass
else:
raise
########################################################################################################################
def hexify (s, preserve_printables=False):
hexed = ""
for b in s:
if preserve_printables and b in string.printable:
hexed += b
else:
hexed += "%02x" % ord(b)
return "".join(hexed)
########################################################################################################################
def hex_dump (data, addr=0, prefix=""):
dump = prefix
slice = ""
for byte in data:
if addr % 16 == 0:
dump += " "
for char in slice:
if ord(char) >= 32 and ord(char) <= 126:
dump += char
else:
dump += "."
dump += "\n%s%04x: " % (prefix, addr)
slice = ""
dump += "%02x " % ord(byte)
slice += byte
addr += 1
remainder = addr % 16
if remainder != 0:
dump += " " * (16 - remainder) + " "
for char in slice:
if ord(char) >= 32 and ord(char) <= 126:
dump += char
else:
dump += "."
return dump + "\n"
########################################################################################################################
def find_all (needle, haystack, include_marker=False):
indexes = []
for match in re.finditer(needle, haystack):
found = match.start()
if include_marker:
found += len(needle)
indexes.append(found)
if DEBUG:
print "found needle '%s' in haystack at offset %04x" % (hexify(needle), found)
return indexes
########################################################################################################################
def commify (number):
number = str(number)
processing = 1
regex = re.compile(r"^(-?\d+)(\d{3})")
while processing:
(number, processing) = regex.subn(r"\1,\2",number)
return number
########################################################################################################################
def carve_helper (starters, enders, data, output_dir, marker_kind):
for s in starters:
for e in enders:
# start must be before end.
if not s < e:
continue
# carvings are tagged by their type followed by their start/end index.
carving_name = "%s-%02d-%02d.%s"
carving_name %= marker_kind, starters.index(s), enders.index(e), CARVE_PAIRINGS[marker_kind][EXTENSION]
# open a file for writing, ensure to include the full length of the end marker.
carving_path = os.path.join(output_dir, carving_name)
with open(carving_path, "wb+") as fh:
# NOTE: we're removing the one-byte per iteration chop from find_all().
slice = data[s:e]
print "writing %s bytes from %08x to %08x to %s" % (commify(len(slice)), s, e, carving_path)
fh.write(slice)
########################################################################################################################
def carve_all (path=None, data=None):
if path is None and data is None:
raise Exception("carve_all() either 'path' or 'data' optargs must be specified.")
if path:
with open(input_file, "rb") as fh:
data = fh.read()
# walk the carving pairing datastructure and carve away.
for kind, marker in CARVE_PAIRINGS.iteritems():
print ">>>>> carving for %s" % kind
# there's always a start.
starters = find_all(marker[START], data)
# use end of file, if no end marker is defined.
if marker[END]:
enders = find_all(marker[END], data, include_marker=True)
else:
enders = [len(data)]
if DEBUG:
print "\n>>> %s START MARKERS (dump from marker)" % kind
for index in starters:
print index, hex(index), hex_dump(data[index : index + 64])
print "\n>>> %s END MARKERS (dump to marker)" % kind
for index in enders:
print index, hex(index), hex_dump(data[index - 64 - len(marker[END]) : index + len(marker[END])])
# carve out the markers.
carve_helper(starters, enders, data, output_dir, kind)
########################################################################################################################
if __name__ == "__main__":
# toggle on verbose outputs.
if "-v" in sys.argv:
sys.argv.remove("-v")
DEBUG = True
# we at least need an input file.
if len(sys.argv) == 3:
output_dir = sys.argv.pop()
input_file = sys.argv.pop()
elif len(sys.argv) == 2:
output_dir = "./"
input_file = sys.argv.pop()
else:
usage()
# ensure input file and output directories exist.
if not os.path.exists(input_file):
usage("file not found: %s" % input_file)
mkdir_p(output_dir)
# call the carver.
carve_all(path=input_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment