zaim/stats.py

## stats.py
#!/usr/bin/python

"Parses a playlist and prints some stats about songs played"

import datetime
import optfunc   # https://github.com/simonw/optfunc/tree
import os
import re
import sys
from operator import itemgetter

def sortdval(d, reverse=False):
    "Sort dict by value"
    return sorted(d.iteritems(), key=itemgetter(1), reverse=reverse)

def count(filename):
    "Usage: %prog count songs_file\n\nCount songs played since started log"

    if not os.path.isfile(filename):
        print "file not found: " + filename
        return

    started = 0
    ended   = 0
    count   = 0

    for i, line in enumerate(open(filename, 'r')):
        ts, artist, title, album = line.split("\t")
        if i == 0:
            started = ts
        ended  = ts
        count += 1

    started = datetime.datetime.fromtimestamp(float(started))
    ended   = datetime.datetime.fromtimestamp(float(ended))
    length  = ended - started
    hours   = length.seconds / 60 / 60

    print "Log started at %s" % started.strftime("%x %X")
    print "Last entry  at %s" % ended.strftime("%x %X")
    print
    print "%d songs played in %d hours" % (count, hours)
    print "average %d songs per hour" % (count / hours)


def pattern(filename, period=3600):
    "Usage: %prog pattern songs_file\n\nLists songs played more than once per given period"

    period = int(period)

    if not os.path.isfile(filename):
        print "file not found: " + filename
        return

    sets  = [{}]
    s     = 0
    last  = 0
    regex = re.compile('\s+')
    dupes = {}

    started = 0
    ended   = 0
    count   = 0

    for i, line in enumerate(open(filename, 'r')):
        ts, artist, title, album = line.split("\t")
        ts = float(ts)

        if i == 0:
            started = ts
            last = ts

        if (ts - last) > period:
            s    += 1
            sets += [{}]
            last  = ts

        # normalize artist/title
        name = regex.sub(" ", "%s %s" % (artist, title))
        name = name.lower()

        # per period
        if name in sets[s].keys():
            sets[s][name] += 1
        else:
            sets[s][name] = 1

        # per overall
        if name in dupes:
            dupes[name] += 1
        else:
            dupes[name] = 1

        ended = ts
        count = i + 1

    period_hour = period / 60 / 60
    print "Number of time song played every %.2f hour%s" % (period / 60 / 60, {True: 's', False:''}[period_hour > 1])
    print

    for (i, set) in enumerate(sets):
        set_start = datetime.datetime.fromtimestamp(started + (i * period)).strftime("%X")
        set_end   = datetime.datetime.fromtimestamp(started + (i * period) + period).strftime("%X")
        print "%s - %s" % (set_start, set_end)
        for n, c in sortdval(set, True):
            print "   %s: %d" % (n, c)
        print

    print "Total songs played: %d in %d hours" % (count, (ended-started) / 60 / 60)
    print

    print "Songs played more than once overall (%d):" % len([d for d in dupes if dupes[d] > 1])
    for (n, c) in sortdval(dupes, True):
        if c == 1: continue
        print "   %s: %d" % (n, c)


if __name__ == '__main__':
    optfunc.run([count, pattern])
	#!/usr/bin/python

	"Parses a playlist and prints some stats about songs played"

	import datetime
	import optfunc # https://github.com/simonw/optfunc/tree
	import os
	import re
	import sys
	from operator import itemgetter

	def sortdval(d, reverse=False):
	"Sort dict by value"
	return sorted(d.iteritems(), key=itemgetter(1), reverse=reverse)

	def count(filename):
	"Usage: %prog count songs_file\n\nCount songs played since started log"

	if not os.path.isfile(filename):
	print "file not found: " + filename
	return

	started = 0
	ended = 0
	count = 0

	for i, line in enumerate(open(filename, 'r')):
	ts, artist, title, album = line.split("\t")
	if i == 0:
	started = ts
	ended = ts
	count += 1

	started = datetime.datetime.fromtimestamp(float(started))
	ended = datetime.datetime.fromtimestamp(float(ended))
	length = ended - started
	hours = length.seconds / 60 / 60

	print "Log started at %s" % started.strftime("%x %X")
	print "Last entry at %s" % ended.strftime("%x %X")
	print
	print "%d songs played in %d hours" % (count, hours)
	print "average %d songs per hour" % (count / hours)


	def pattern(filename, period=3600):
	"Usage: %prog pattern songs_file\n\nLists songs played more than once per given period"

	period = int(period)

	if not os.path.isfile(filename):
	print "file not found: " + filename
	return

	sets = [{}]
	s = 0
	last = 0
	regex = re.compile('\s+')
	dupes = {}

	started = 0
	ended = 0
	count = 0

	for i, line in enumerate(open(filename, 'r')):
	ts, artist, title, album = line.split("\t")
	ts = float(ts)

	if i == 0:
	started = ts
	last = ts

	if (ts - last) > period:
	s += 1
	sets += [{}]
	last = ts

	# normalize artist/title
	name = regex.sub(" ", "%s %s" % (artist, title))
	name = name.lower()

	# per period
	if name in sets[s].keys():
	sets[s][name] += 1
	else:
	sets[s][name] = 1

	# per overall
	if name in dupes:
	dupes[name] += 1
	else:
	dupes[name] = 1

	ended = ts
	count = i + 1

	period_hour = period / 60 / 60
	print "Number of time song played every %.2f hour%s" % (period / 60 / 60, {True: 's', False:''}[period_hour > 1])
	print

	for (i, set) in enumerate(sets):
	set_start = datetime.datetime.fromtimestamp(started + (i * period)).strftime("%X")
	set_end = datetime.datetime.fromtimestamp(started + (i * period) + period).strftime("%X")
	print "%s - %s" % (set_start, set_end)
	for n, c in sortdval(set, True):
	print " %s: %d" % (n, c)
	print

	print "Total songs played: %d in %d hours" % (count, (ended-started) / 60 / 60)
	print

	print "Songs played more than once overall (%d):" % len([d for d in dupes if dupes[d] > 1])
	for (n, c) in sortdval(dupes, True):
	if c == 1: continue
	print " %s: %d" % (n, c)


	if __name__ == '__main__':
	optfunc.run([count, pattern])