danzek/reformat_encase_internet.py

## reformat_encase_internet.py
#!/usr/bin/python -tt
# -*- coding: utf-8 -*-
"""
Given the starting line number from the html output (and optional ending line number), parses Internet history
artifacts from a default EnCase 7 HTML report into a nicer looking table. Be sure to customize fields and field widths
as needed.

Copyright 2015, Dan O'Day (d@4n68r.com)

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""

import codecs
import re
import sys


__author__ = "Dan O'Day"
__license__ = "GNU General Public License"
__status__ = "Development"


def write_string_to_file(s):
    """
    Given string, writes to final report file

    :param s: string to write to file
    """
    with codecs.open('report.html', encoding='utf-8', mode='a+') as of:
        of.write(s)


def write_record(record, columns, widths):
    """
    Given Internet artifact record, column header names, and column widths, writes record to report file

    :param record: dictionary containing Internet artifact row data
    :param columns: list containing names of header titles for each column, must match dictionary key names of record
    :param widths: list containing cell widths for each column
    """
    with codecs.open('report.html', encoding='utf-8', mode='a+') as of:
        row_open = '<tr valign="top">'
        field_close = '</div></td>'
        row_close = '</tr>'

        row_data = ''
        for c in columns:
            width = widths[columns.index(c)]
            field_open = '<td class="tab" style="width:{}"><div class="IC" style="width:{}">'.format(width, width)
            if record[c]:
                row_data += field_open + record[c] + field_close
            else:
                row_data += field_open + '&nbsp;' + field_close

        of.write(''.join([row_open, row_data, row_close]))


def get_line_count(file_name):
    """
    Get line count of file

    :param file_name: name of file to count lines from
    """
    with open(file_name, 'r') as f:
        for i, l in enumerate(f):
            pass
    return i


def process_artifacts(file_name, start_line, end_line=None):
    """
    Process Internet artifacts from EnCase 7 html report

    :param file_name: name of file containing Internet artifacts from EnCase 7 html report
    :param start_line: starting line number of artifacts, should not include title/heading
    :param end_line: optional ending line number of artifacts, must be supplied if additional content is presented
                     in the html report after the Internet artifacts
    """
    # header and footer placeholders, line count
    header = ''
    footer = ''
    if end_line:
        line_count = end_line
    else:
        footer = '</body></html>'
        line_count = get_line_count(file_name)

    table_header = '<div style="font-size:9pt;color:#000000;padding-left:15pt;"><table border="1" cellspacing="0" ' \
               'cellpadding="0">'
    table_footer = '</table></div>'

    # generate table header row
    table_title_template_open = """<td class="tab" align="center" style="background-color:#bebebe;color:#000000;"""
    table_title_template_close = """</div></td>"""

    columns = ['#', 'Source', 'Item Path', 'Type', 'Visit Count', 'URL Name', 'Record Last Accessed',
               'Internet Artifact Type', 'Title', 'Browser Type', 'Profile']

    widths = ['38', '75', '100', '25', '40', '200', '75', '100', '150', '100', '60']

    table_title_row = ''
    for c in columns:
        width = widths[columns.index(c)]
        table_title_row += table_title_template_open + \
            'width:{}"><div class="IC" style="width:{}">'.format(width, width) + c + table_title_template_close

    with codecs.open(file_name, 'r', "utf-8") as f:
        # regex patterns for data types
        new_record_pattern = re.compile(r'>(\d+)\)')
        source_pattern = re.compile(r'&nbsp;(.+)<br/>')
        item_path_pattern = re.compile(r'>Item Path\t(.*)<br/>')
        type_pattern = re.compile(r'>Type</div>.*class="IC".*>(.*)</div></td>')
        visit_count_pattern = re.compile(r'>Visit Count<.*class="IC".*>(.*)</div></td>')
        url_name_pattern = re.compile(r'>Url Name</div>.*class="IC".*>(.*)</div></td>')
        last_access_pattern = re.compile(r'>Record Last Accessed</div>.*class="IC".*>(.*)</div></td>')
        artifact_pattern = re.compile(r'>Internet Artifact Type</div>.*class="IC".*>(.*)</div></td>')
        title_pattern = re.compile(r'>Title</div>.*class="IC".*>(.*)</div></td>')
        browser_pattern = re.compile(r'>Browser Type</div>.*class="IC".*>(.*)</div></td>')
        profile_pattern = re.compile(r'>Profile Name</div>.*class="IC".*>(.*)</div></td>')

        current_record = {k: None for k in columns}
        current_record_number = 0

        # iterate through each line
        for ln, line in enumerate(f):
            # progress bar
            percent_complete = int((float(ln + 1) / line_count) * 100)
            sys.stdout.write('\r')
            print '[' + ('|' * (percent_complete / 2)) + ('.' * (50 - (percent_complete / 2))) + \
                  '] {}%'.format(percent_complete),
            sys.stdout.flush()

            if (ln + 1) < start_line:
                header += line
                # add custom css class for report
                if '<style type="text/css">' in line:
                    header += '.tab {font-family:"Calibri";font-size:10pt;margin:0px;padding:2;word-wrap:break-word;' \
                              'overflow:hidden;border-color:black;border-style:solid}'
            elif ((ln + 1) >= start_line) and (ln <= line_count):
                if (ln + 1) == start_line:
                    # write initial report data
                    write_string_to_file(header)
                    write_string_to_file(table_header)
                    write_string_to_file(table_title_row)

                if new_record_pattern.search(line):  # test for new record
                    # write old record to file
                    if current_record_number > 0:
                        write_record(current_record, columns, widths)

                    # begin new record
                    current_record_number += 1  # increment current record counter
                    current_record = {k: None for k in columns}  # initialize temp dictionary
                    current_record['#'] = str(current_record_number)
                    current_record['Source'] = source_pattern.search(line).group(1)
                    continue
                elif '>Item Path' in line:
                    current_record['Item Path'] = item_path_pattern.search(line).group(1)
                    continue
                elif '>Type</div>' in line:
                    current_record['Type'] = type_pattern.search(line).group(1)
                    continue
                elif '>Visit Count<' in line:
                    current_record['Visit Count'] = visit_count_pattern.search(line).group(1)
                    continue
                elif '>Url Name<' in line:
                    current_record['URL Name'] = url_name_pattern.search(line).group(1)
                    continue
                elif '>Record Last Accessed<' in line:
                    current_record['Record Last Accessed'] = last_access_pattern.search(line).group(1)
                    continue
                elif '>Internet Artifact Type<' in line:
                    current_record['Internet Artifact Type'] = artifact_pattern.search(line).group(1)
                    continue
                elif '>Title<' in line:
                    current_record['Title'] = title_pattern.search(line).group(1)
                    continue
                elif '>Browser Type<' in line:
                    current_record['Browser Type'] = browser_pattern.search(line).group(1)
                    continue
                elif '>Profile Name<' in line:
                    current_record['Profile'] = profile_pattern.search(line).group(1)
                    continue
            elif ln > end_line:
                footer += line

        write_record(current_record, columns, widths)
        write_string_to_file(table_footer)
        write_string_to_file(footer)


def main():
    """
    main() function

    :return:
    """
    # check for CLI parameters
    if not sys.argv[1] and sys.argv[2]:
        print 'you need to specify a report file and starting line (and optional ending line)'
        sys.exit(1)

    # validate starting line number
    try:
        start_line = int(sys.argv[2])
    except ValueError:
        print 'not a valid number'
        sys.exit(1)

    # check for optional end line parameter and validate if present
    try:
        if sys.argv[3]:
            try:
                end_line = int(sys.argv[3])
                process_artifacts(sys.argv[1], start_line, end_line)
            except ValueError:
                print 'not a valid number'
                sys.exit(1)
    except IndexError:
        process_artifacts(sys.argv[1], start_line)


if __name__ == '__main__':
    main()
	#!/usr/bin/python -tt
	# -- coding: utf-8 --
	"""
	Given the starting line number from the html output (and optional ending line number), parses Internet history
	artifacts from a default EnCase 7 HTML report into a nicer looking table. Be sure to customize fields and field widths
	as needed.

	Copyright 2015, Dan O'Day (d@4n68r.com)

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
	WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
	COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
	OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	"""

	import codecs
	import re
	import sys


	__author__ = "Dan O'Day"
	__license__ = "GNU General Public License"
	__status__ = "Development"


	def write_string_to_file(s):
	"""
	Given string, writes to final report file

	:param s: string to write to file
	"""
	with codecs.open('report.html', encoding='utf-8', mode='a+') as of:
	of.write(s)


	def write_record(record, columns, widths):
	"""
	Given Internet artifact record, column header names, and column widths, writes record to report file

	:param record: dictionary containing Internet artifact row data
	:param columns: list containing names of header titles for each column, must match dictionary key names of record
	:param widths: list containing cell widths for each column
	"""
	with codecs.open('report.html', encoding='utf-8', mode='a+') as of:
	row_open = '<tr valign="top">'
	field_close = '</div></td>'
	row_close = '</tr>'

	row_data = ''
	for c in columns:
	width = widths[columns.index(c)]
	field_open = '<td class="tab" style="width:{}"><div class="IC" style="width:{}">'.format(width, width)
	if record[c]:
	row_data += field_open + record[c] + field_close
	else:
	row_data += field_open + ' ' + field_close

	of.write(''.join([row_open, row_data, row_close]))


	def get_line_count(file_name):
	"""
	Get line count of file

	:param file_name: name of file to count lines from
	"""
	with open(file_name, 'r') as f:
	for i, l in enumerate(f):
	pass
	return i


	def process_artifacts(file_name, start_line, end_line=None):
	"""
	Process Internet artifacts from EnCase 7 html report

	:param file_name: name of file containing Internet artifacts from EnCase 7 html report
	:param start_line: starting line number of artifacts, should not include title/heading
	:param end_line: optional ending line number of artifacts, must be supplied if additional content is presented
	in the html report after the Internet artifacts
	"""
	# header and footer placeholders, line count
	header = ''
	footer = ''
	if end_line:
	line_count = end_line
	else:
	footer = '</body></html>'
	line_count = get_line_count(file_name)

	table_header = '<div style="font-size:9pt;color:#000000;padding-left:15pt;"><table border="1" cellspacing="0" ' \
	'cellpadding="0">'
	table_footer = '</table></div>'

	# generate table header row
	table_title_template_open = """<td class="tab" align="center" style="background-color:#bebebe;color:#000000;"""
	table_title_template_close = """</div></td>"""

	columns = ['#', 'Source', 'Item Path', 'Type', 'Visit Count', 'URL Name', 'Record Last Accessed',
	'Internet Artifact Type', 'Title', 'Browser Type', 'Profile']

	widths = ['38', '75', '100', '25', '40', '200', '75', '100', '150', '100', '60']

	table_title_row = ''
	for c in columns:
	width = widths[columns.index(c)]
	table_title_row += table_title_template_open + \
	'width:{}"><div class="IC" style="width:{}">'.format(width, width) + c + table_title_template_close

	with codecs.open(file_name, 'r', "utf-8") as f:
	# regex patterns for data types
	new_record_pattern = re.compile(r'>(\d+)\)')
	source_pattern = re.compile(r' (.+)<br/>')
	item_path_pattern = re.compile(r'>Item Path\t(.*)<br/>')
	type_pattern = re.compile(r'>Type</div>.class="IC".>(.*)</div></td>')
	visit_count_pattern = re.compile(r'>Visit Count<.class="IC".>(.*)</div></td>')
	url_name_pattern = re.compile(r'>Url Name</div>.class="IC".>(.*)</div></td>')
	last_access_pattern = re.compile(r'>Record Last Accessed</div>.class="IC".>(.*)</div></td>')
	artifact_pattern = re.compile(r'>Internet Artifact Type</div>.class="IC".>(.*)</div></td>')
	title_pattern = re.compile(r'>Title</div>.class="IC".>(.*)</div></td>')
	browser_pattern = re.compile(r'>Browser Type</div>.class="IC".>(.*)</div></td>')
	profile_pattern = re.compile(r'>Profile Name</div>.class="IC".>(.*)</div></td>')

	current_record = {k: None for k in columns}
	current_record_number = 0

	# iterate through each line
	for ln, line in enumerate(f):
	# progress bar
	percent_complete = int((float(ln + 1) / line_count) * 100)
	sys.stdout.write('\r')
	print '[' + ('\|' * (percent_complete / 2)) + ('.' * (50 - (percent_complete / 2))) + \
	'] {}%'.format(percent_complete),
	sys.stdout.flush()

	if (ln + 1) < start_line:
	header += line
	# add custom css class for report
	if '<style type="text/css">' in line:
	header += '.tab {font-family:"Calibri";font-size:10pt;margin:0px;padding:2;word-wrap:break-word;' \
	'overflow:hidden;border-color:black;border-style:solid}'
	elif ((ln + 1) >= start_line) and (ln <= line_count):
	if (ln + 1) == start_line:
	# write initial report data
	write_string_to_file(header)
	write_string_to_file(table_header)
	write_string_to_file(table_title_row)

	if new_record_pattern.search(line): # test for new record
	# write old record to file
	if current_record_number > 0:
	write_record(current_record, columns, widths)

	# begin new record
	current_record_number += 1 # increment current record counter
	current_record = {k: None for k in columns} # initialize temp dictionary
	current_record['#'] = str(current_record_number)
	current_record['Source'] = source_pattern.search(line).group(1)
	continue
	elif '>Item Path' in line:
	current_record['Item Path'] = item_path_pattern.search(line).group(1)
	continue
	elif '>Type</div>' in line:
	current_record['Type'] = type_pattern.search(line).group(1)
	continue
	elif '>Visit Count<' in line:
	current_record['Visit Count'] = visit_count_pattern.search(line).group(1)
	continue
	elif '>Url Name<' in line:
	current_record['URL Name'] = url_name_pattern.search(line).group(1)
	continue
	elif '>Record Last Accessed<' in line:
	current_record['Record Last Accessed'] = last_access_pattern.search(line).group(1)
	continue
	elif '>Internet Artifact Type<' in line:
	current_record['Internet Artifact Type'] = artifact_pattern.search(line).group(1)
	continue
	elif '>Title<' in line:
	current_record['Title'] = title_pattern.search(line).group(1)
	continue
	elif '>Browser Type<' in line:
	current_record['Browser Type'] = browser_pattern.search(line).group(1)
	continue
	elif '>Profile Name<' in line:
	current_record['Profile'] = profile_pattern.search(line).group(1)
	continue
	elif ln > end_line:
	footer += line

	write_record(current_record, columns, widths)
	write_string_to_file(table_footer)
	write_string_to_file(footer)


	def main():
	"""
	main() function

	:return:
	"""
	# check for CLI parameters
	if not sys.argv[1] and sys.argv[2]:
	print 'you need to specify a report file and starting line (and optional ending line)'
	sys.exit(1)

	# validate starting line number
	try:
	start_line = int(sys.argv[2])
	except ValueError:
	print 'not a valid number'
	sys.exit(1)

	# check for optional end line parameter and validate if present
	try:
	if sys.argv[3]:
	try:
	end_line = int(sys.argv[3])
	process_artifacts(sys.argv[1], start_line, end_line)
	except ValueError:
	print 'not a valid number'
	sys.exit(1)
	except IndexError:
	process_artifacts(sys.argv[1], start_line)


	if __name__ == '__main__':
	main()