Skip to content

Instantly share code, notes, and snippets.

Created June 16, 2015 10:43
Show Gist options
  • Save framirez/a0bbc99f3b4e6af8c5b2 to your computer and use it in GitHub Desktop.
Save framirez/a0bbc99f3b4e6af8c5b2 to your computer and use it in GitHub Desktop.
The script will look for all the common strings between files
The script will look for all the common strings between files in A group and
it will remove all strings found in the files of B group
All given Zip files will be extracted and the strings will be retrieved from
them | 2015
import os
import re
import sys
import magic
import shutil
import string
import zipfile
import argparse
import tempfile
def get_file_type(file_name):
file_type = ""
content = ""
with open(file_name, "rb") as f:
content =
ms =
file_type = ms.buffer(content)
return file_type
def is_zip_file(file_name):
file_type = get_file_type(file_name)
is_zip = "Zip archive data" in file_type
return is_zip
def strings(file_name, min_string_length):
with open(file_name, "rb") as f:
result = ""
for char in
if char in string.printable[:-2]:
result += char
if len(result) >= min_string_length:
yield result
result = ""
def get_zip_files_strings(zip_file, files_include, min_string_length):
print '\x1b[%sm%s\x1b[0m' % (';'.join(['32']), zip_file), " UnZipping file and get the strings"
found_strings = []
out_path = tempfile.mkdtemp()
fh = open(zip_file, 'rb')
z = zipfile.ZipFile(fh)
for name in z.namelist():
if name.endswith('/'):
os.makedirs("%s/%s" % (out_path, name))
z.extract(name, out_path)
if not name.endswith('/'):
if files_include is None or name in files_include:
found_strings += strings("%s/%s" % (out_path, name), min_string_length)
return found_strings
def process_group(file_list, files_include, operation_type, min_string_length):
group_strings = []
for file_name in file_list:
if is_zip_file(file_name):
file_strings = get_zip_files_strings(file_name, \
files_include, min_string_length)
print '\x1b[%sm%s\x1b[0m' % (';'.join(['32']), file_name), " Get the strings"
file_strings = strings(file_name, min_string_length)
if operation_type == "intersection":
processed_strings = reduce(set.__and__, group_strings)
elif operation_type == "union":
processed_strings = reduce(set.__or__, group_strings)
return processed_strings
def pretty_print(group):
for similar in group:
similar = similar.replace("\n","").replace("\t","").replace("\r","")
if not"^\s*$", similar):
print similar
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=help_description)
parser.add_argument('-f', '--files', dest="files", \
help='Files to get the strings, for default all of them', default=None)
parser.add_argument('-m', '--min-string', dest="min_string_length", \
help='Minimum length for each strings', default=8)
parser.add_argument('-a', action='append', dest='a_group', default=[], \
help='Add files to compare A group')
parser.add_argument('-b', action='append', dest='b_group', default=[], \
help='Add files to compare B group')
args = parser.parse_args()
results = parser.parse_args()
if results.a_group:
a_group = process_group(results.a_group, results.files, "intersection", int(results.min_string_length))
if results.b_group:
b_group = process_group(results.b_group, results.files, "union", int(results.min_string_length))
if results.b_group:
difference = a_group - b_group
if results.a_group:
print '\x1b[%sm%s\x1b[0m' % (';'.join(['1']), str(len(a_group))), "\tcommon string found between all files in A group"
if results.b_group:
print '\x1b[%sm%s\x1b[0m' % (';'.join(['1']), str(len(b_group))), "\tdifferent strings found in B group"
if results.a_group and results.b_group:
print '\x1b[%sm%s\x1b[0m' % (';'.join(['1']), str(len(a_group - b_group))), "\tstring from A group not present in B group"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment