9b/compare_pdf.py

## compare_pdf.py
__description__ = 'Analyzes Malicious PDF Object in Memory'
__author__ = 'Brandon Dixon'
__version__ = '1.0'
__date__ = '2011/11/07'

from object_builder import *
import simplejson as json
import optparse
import malobjclass
import codecs

#user defined actions here
def user_land(pdf):
	if type(pdf) is not list:
		for obj in pdf.objs:
			if obj.stream_encoded_stream != None:
				print str(obj.id) + "," + obj.stream_encoded_hash
	else:
		encoder_holder = []
		raw_holder = []
		for p in pdf:

			print "[+] File Data"
			print "\t[+] MD5 " + p.file_md5
			print "\t[+] Object Count " + str(len(p.objs))

			raw = []
			encoded = []
			for obj in p.objs:
				if obj.stream_encoded_stream != None:
					encoded.append(obj.stream_encoded_hash)
				raw.append(obj.raw_hash)
			encoder_holder.append(encoded)
			raw_holder.append(raw)

		matching_hashes = set(encoder_holder[0]) & set(encoder_holder[1]) & set(encoder_holder[2])
		print "[+] The following " + str(len(matching_hashes)) + " encoded stream hashes are shared"
		for match in matching_hashes:
			obj = p.get_object_by_hash(match,"encoded")
			print match + " ID: " + str(obj.id)

		matching_raw = set(raw_holder[0]) & set(raw_holder[1]) & set(raw_holder[2])
		print "[+] The following " + str(len(matching_raw)) + " raw hashes are shared"
		for match in matching_raw:
			obj = p.get_object_by_hash(match,"raw")
			print  match + " ID: " + str(obj.id)

def main():
	oParser = optparse.OptionParser(usage='usage: %prog [options]\n' + __description__, version='%prog ' + __version__)
	oParser.add_option('-f', '--file', default='', type='string', help='file to build an object from')
	oParser.add_option('-d', '--dir', default='', type='string', help='dir to build an object from')
	(options, args) = oParser.parse_args()

	if options.file:
		output = build_obj(options.file)
		pdf = malobjclass.jPdf(json.loads(output))
		user_land(pdf)
	elif options.dir:
		files = []
		pdfs = []
		dirlist = os.listdir(options.dir)
		for fname in dirlist:
			files.append(fname)
		files.sort()
		for file in files:
			print "[+] Analyzing file " + file
			output = build_obj(options.dir + file)
			pdf = malobjclass.jPdf(json.loads(output))
			pdfs.append(pdf)

		user_land(pdfs)
	else:
		oParser.print_help()
		return

if __name__ == '__main__':
	main()
	__description__ = 'Analyzes Malicious PDF Object in Memory'
	__author__ = 'Brandon Dixon'
	__version__ = '1.0'
	__date__ = '2011/11/07'

	from object_builder import *
	import simplejson as json
	import optparse
	import malobjclass
	import codecs

	#user defined actions here
	def user_land(pdf):
	if type(pdf) is not list:
	for obj in pdf.objs:
	if obj.stream_encoded_stream != None:
	print str(obj.id) + "," + obj.stream_encoded_hash
	else:
	encoder_holder = []
	raw_holder = []
	for p in pdf:

	print "[+] File Data"
	print "\t[+] MD5 " + p.file_md5
	print "\t[+] Object Count " + str(len(p.objs))

	raw = []
	encoded = []
	for obj in p.objs:
	if obj.stream_encoded_stream != None:
	encoded.append(obj.stream_encoded_hash)
	raw.append(obj.raw_hash)
	encoder_holder.append(encoded)
	raw_holder.append(raw)

	matching_hashes = set(encoder_holder[0]) & set(encoder_holder[1]) & set(encoder_holder[2])
	print "[+] The following " + str(len(matching_hashes)) + " encoded stream hashes are shared"
	for match in matching_hashes:
	obj = p.get_object_by_hash(match,"encoded")
	print match + " ID: " + str(obj.id)

	matching_raw = set(raw_holder[0]) & set(raw_holder[1]) & set(raw_holder[2])
	print "[+] The following " + str(len(matching_raw)) + " raw hashes are shared"
	for match in matching_raw:
	obj = p.get_object_by_hash(match,"raw")
	print match + " ID: " + str(obj.id)

	def main():
	oParser = optparse.OptionParser(usage='usage: %prog [options]\n' + __description__, version='%prog ' + __version__)
	oParser.add_option('-f', '--file', default='', type='string', help='file to build an object from')
	oParser.add_option('-d', '--dir', default='', type='string', help='dir to build an object from')
	(options, args) = oParser.parse_args()

	if options.file:
	output = build_obj(options.file)
	pdf = malobjclass.jPdf(json.loads(output))
	user_land(pdf)
	elif options.dir:
	files = []
	pdfs = []
	dirlist = os.listdir(options.dir)
	for fname in dirlist:
	files.append(fname)
	files.sort()
	for file in files:
	print "[+] Analyzing file " + file
	output = build_obj(options.dir + file)
	pdf = malobjclass.jPdf(json.loads(output))
	pdfs.append(pdf)

	user_land(pdfs)
	else:
	oParser.print_help()
	return

	if __name__ == '__main__':
	main()