jsundram/parse_batlog.py

## parse_batlog.py
import datetime
import dateutil.parser
import itertools
import json
import os
import pytz
import re


# For use with data generated from:
# http://www.ifweassume.com/2013/08/the-de-evolution-of-my-laptop-battery.html


MATCHER = re.compile('.*"(.*)" = (.*)\n')
def parse_row(line):
    """Takes an input data line, parses it and returns a key, value pair."""
    key, value = MATCHER.match(line).groups()
    if key == "LegacyBatteryInfo":
        value = json.loads(value.replace('=', ': '))
    else:
        value = int(value)
    return key, value


def parse(filename):
    data = []
    default_tz = pytz.timezone('US/Pacific')  # YMMV
    with open(filename) as f:
        time, prev_time = None, None
        for key, lines in itertools.groupby(f, lambda l: not l.startswith(' ')):
            if key:
                timestamp = lines.next().strip()
                time = dateutil.parser.parse(timestamp)
                # Prevent TypeError: can't compare offset-naive and offset-aware datetimes
                # on line 49 (prev_time < time) by making our time aware of its offset.
                if not time.tzinfo:
                    time = default_tz.localize(time, is_dst=None)
            else:
                record = dict(map(parse_row, lines))

                # Deal with system clock issues . . .
                # My battery ran down all the way and the system clock went from
                # April 19, 2015 to Dec 31, 2013.
                #
                # Current approach is to throw these data points away.
                # It *might* be reasonable to just assume these occured right before
                # the clock was corrected and recode them.
                if not prev_time or prev_time < time:
                    data.append((time, record))
                    prev_time = time
                else:
                    print("discarding %s, %s, %s" % (time, prev_time, prev_time < time))
    return data


def main():
    data = parse(os.path.expanduser('~/batlog.dat'))
    # Alternatively, return a dataframe.
    """
    import pandas
    df = pandas.DataFrame.from_items(data).transpose()
    df['CurrentCapacity']
    """
    return data


if __name__ == '__main__':
    data = main()
	import datetime
	import dateutil.parser
	import itertools
	import json
	import os
	import pytz
	import re


	# For use with data generated from:
	# http://www.ifweassume.com/2013/08/the-de-evolution-of-my-laptop-battery.html


	MATCHER = re.compile('."(.)" = (.*)\n')
	def parse_row(line):
	"""Takes an input data line, parses it and returns a key, value pair."""
	key, value = MATCHER.match(line).groups()
	if key == "LegacyBatteryInfo":
	value = json.loads(value.replace('=', ': '))
	else:
	value = int(value)
	return key, value


	def parse(filename):
	data = []
	default_tz = pytz.timezone('US/Pacific') # YMMV
	with open(filename) as f:
	time, prev_time = None, None
	for key, lines in itertools.groupby(f, lambda l: not l.startswith(' ')):
	if key:
	timestamp = lines.next().strip()
	time = dateutil.parser.parse(timestamp)
	# Prevent TypeError: can't compare offset-naive and offset-aware datetimes
	# on line 49 (prev_time < time) by making our time aware of its offset.
	if not time.tzinfo:
	time = default_tz.localize(time, is_dst=None)
	else:
	record = dict(map(parse_row, lines))

	# Deal with system clock issues . . .
	# My battery ran down all the way and the system clock went from
	# April 19, 2015 to Dec 31, 2013.
	#
	# Current approach is to throw these data points away.
	# It might be reasonable to just assume these occured right before
	# the clock was corrected and recode them.
	if not prev_time or prev_time < time:
	data.append((time, record))
	prev_time = time
	else:
	print("discarding %s, %s, %s" % (time, prev_time, prev_time < time))
	return data


	def main():
	data = parse(os.path.expanduser('~/batlog.dat'))
	# Alternatively, return a dataframe.
	"""
	import pandas
	df = pandas.DataFrame.from_items(data).transpose()
	df['CurrentCapacity']
	"""
	return data


	if __name__ == '__main__':
	data = main()