Last active
May 9, 2016 23:48
-
-
Save chris-piekarski/6e4fd659a8bfd0e57e05 to your computer and use it in GitHub Desktop.
Parse Android logcat file into .csv file with cols as TAGS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: @c_piekarski | |
import optparse | |
import csv | |
import logging | |
import os | |
import datetime | |
import re | |
TAGS = ["Browser","Dalvikvm", "Zygote", "AndroidRuntime"] | |
SUBTAGS = [ | |
[], | |
[], | |
[], | |
[] | |
] | |
global COLS | |
COLS = ["Time"] + TAGS | |
BASE_TIME = -1 | |
def write_to_csv(row, file_name): | |
with open(file_name, "ab") as f: | |
w = csv.writer(f, delimiter=',') | |
w.writerows([row]) | |
def clear_output_file(filename): | |
if os.path.exists(filename): | |
logging.debug("removing existing output file %s" % filename) | |
os.remove(filename) | |
else: | |
logging.debug("output file doesn't exist") | |
#TODO: use regex instead | |
def line_has_tag(line): | |
for t,tt in enumerate(TAGS): | |
if(line.find(tt+"(") != -1): | |
logging.debug("found a matching tag: {}".format(tt)) | |
if len(SUBTAGS[t]) == 0: | |
return t | |
for ttt in SUBTAGS[t]: | |
if(line.find(ttt) != -1): | |
return t | |
return -1 | |
def get_time_stamp(line): | |
ss = line.split(" ") | |
return ss[1] | |
def get_message(line): | |
ss = re.split(r': {1}', line, 1) | |
return ss[1] | |
def parse_time(time): | |
return datetime.datetime.strptime(time, "%H:%M:%S.%f") | |
def parse_file(input_file, output_file): | |
global BASE_TIME | |
global COLS | |
lines = 0 | |
print COLS | |
write_to_csv(COLS, output_file) | |
logging.debug("input file is: {}".format(input_file)) | |
with open(input_file, 'r') as f: | |
for line in f: | |
i = line_has_tag(line) | |
if(i > -1): | |
lines = lines + 1 | |
time = get_time_stamp(line) | |
if BASE_TIME == -1: | |
BASE_TIME = parse_time(time) | |
msg = get_message(line) | |
blank = [""] * len(COLS) | |
blank[0] = time | |
if COLS[1] == "Delta": | |
blank[1] = (parse_time(time) - BASE_TIME).total_seconds() | |
BASE_TIME = parse_time(time) | |
blank[i+2] = msg | |
else: | |
blank[i+1] = msg | |
write_to_csv(blank, output_file) | |
logging.info("total tagged input lines: {}".format(lines)) | |
if __name__ == "__main__": | |
parser = optparse.OptionParser() | |
parser.add_option('-v', action="store_true", dest="verbose", default=False, help="turn on verbose logging") | |
parser.add_option('-o', action="store", dest="out_file", default="output.csv", help="CSV output file") | |
parser.add_option('-i', action="store", dest="log_file", help="Log file to parse") | |
parser.add_option('-d', action="store_true", dest="delta", help="Add time delta colomn to CSV file") | |
options, remainder = parser.parse_args() | |
# Making sure all mandatory options appeared. | |
mandatories = ['log_file'] | |
for m in mandatories: | |
if not options.__dict__[m]: | |
print "mandatory option is missing\n" | |
parser.print_help() | |
exit(-1) | |
log_level=logging.INFO | |
if options.verbose: | |
log_level=logging.DEBUG | |
logging.basicConfig(level=log_level) | |
if options.delta: | |
COLS.insert(1, "Delta") | |
clear_output_file(options.out_file) | |
parse_file(options.log_file, options.out_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment