huseyinyilmaz/common.py

## common.py
from collections import defaultdict
from collections import Counter
from itertools import izip
from itertools import islice
from operator import itemgetter
import datetime
import random

event_length = 1000000
WIN_SIZE = 3


def get_events():
    event_types = range(20)
    return [random.choice(event_types) for i in range(event_length)]


def original(events, most_common):
    counter = defaultdict(int)
    for index in range(len(events) - WIN_SIZE + 1):
        window = tuple(events[index: index + WIN_SIZE])
        counter[window] += 1

    return sorted(counter.items(),
                  key=itemgetter(1),
                  reverse=True)[:most_common]


def seq(events, most_common):
    counter = Counter(
        izip(*[islice(events, i, None) for i in range(WIN_SIZE)]))
    return counter.most_common(most_common)


def start():
    most_common = 4
    events = get_events()

    dt1 = datetime.datetime.now()
    result = seq(events, most_common)
    dt2 = datetime.datetime.now()
    seq_time = dt2 - dt1
    print result
    print 'result = %s' % result
    print 'seq_time = %s' % seq_time

    dt1 = datetime.datetime.now()
    result = original(events, most_common)
    dt2 = datetime.datetime.now()
    org_time = dt2 - dt1
    print 'original_time = %s' % org_time
    print 'result = %s' % result

if __name__ == '__main__':
    start()

# [((8, 11, 19), 171), ((12, 7, 16), 165), ((1, 5, 15), 164), ((18, 6, 9), 164)]
# result = [((8, 11, 19), 171), ((12, 7, 16), 165), ((1, 5, 15), 164), ((18, 6, 9), 164)]
# seq_time = 0:00:00.604561
# original_time = 0:00:00.771838
# result = [((8, 11, 19), 171), ((12, 7, 16), 165), ((1, 5, 15), 164), ((18, 6, 9), 164)]

## profile.txt
Line #    Mem usage    Increment   Line Contents
================================================
    35    8.047 MiB    0.000 MiB   @profile
    36                             def start():
    37    8.070 MiB    0.023 MiB       most_common = 4
    38   46.812 MiB   38.742 MiB       events = get_events()
    39
    40   46.812 MiB    0.000 MiB       dt1 = datetime.datetime.now()
    41   47.617 MiB    0.805 MiB       result = seq(events, most_common)
    42   47.625 MiB    0.008 MiB       dt2 = datetime.datetime.now()
    43   47.633 MiB    0.008 MiB       seq_time = dt2 - dt1
    44   47.633 MiB    0.000 MiB       print result
    45   47.633 MiB    0.000 MiB       print 'result = %s' % result
    46   47.633 MiB    0.000 MiB       print 'seq_time = %s' % seq_time
    47
    48   47.633 MiB    0.000 MiB       dt1 = datetime.datetime.now()
    49   48.477 MiB    0.844 MiB       result = original(events, most_common)
    50   48.484 MiB    0.008 MiB       dt2 = datetime.datetime.now()
    51   48.484 MiB    0.000 MiB       org_time = dt2 - dt1
    52   48.484 MiB    0.000 MiB       print 'original_time = %s' % org_time
    53   48.484 MiB    0.000 MiB       print 'result = %s' % result
	from collections import defaultdict
	from collections import Counter
	from itertools import izip
	from itertools import islice
	from operator import itemgetter
	import datetime
	import random

	event_length = 1000000
	WIN_SIZE = 3


	def get_events():
	event_types = range(20)
	return [random.choice(event_types) for i in range(event_length)]


	def original(events, most_common):
	counter = defaultdict(int)
	for index in range(len(events) - WIN_SIZE + 1):
	window = tuple(events[index: index + WIN_SIZE])
	counter[window] += 1

	return sorted(counter.items(),
	key=itemgetter(1),
	reverse=True)[:most_common]


	def seq(events, most_common):
	counter = Counter(
	izip(*[islice(events, i, None) for i in range(WIN_SIZE)]))
	return counter.most_common(most_common)


	def start():
	most_common = 4
	events = get_events()

	dt1 = datetime.datetime.now()
	result = seq(events, most_common)
	dt2 = datetime.datetime.now()
	seq_time = dt2 - dt1
	print result
	print 'result = %s' % result
	print 'seq_time = %s' % seq_time

	dt1 = datetime.datetime.now()
	result = original(events, most_common)
	dt2 = datetime.datetime.now()
	org_time = dt2 - dt1
	print 'original_time = %s' % org_time
	print 'result = %s' % result

	if __name__ == '__main__':
	start()

	# [((8, 11, 19), 171), ((12, 7, 16), 165), ((1, 5, 15), 164), ((18, 6, 9), 164)]
	# result = [((8, 11, 19), 171), ((12, 7, 16), 165), ((1, 5, 15), 164), ((18, 6, 9), 164)]
	# seq_time = 0:00:00.604561
	# original_time = 0:00:00.771838
	# result = [((8, 11, 19), 171), ((12, 7, 16), 165), ((1, 5, 15), 164), ((18, 6, 9), 164)]
	Line # Mem usage Increment Line Contents
	================================================
	35 8.047 MiB 0.000 MiB @profile
	36 def start():
	37 8.070 MiB 0.023 MiB most_common = 4
	38 46.812 MiB 38.742 MiB events = get_events()
	39
	40 46.812 MiB 0.000 MiB dt1 = datetime.datetime.now()
	41 47.617 MiB 0.805 MiB result = seq(events, most_common)
	42 47.625 MiB 0.008 MiB dt2 = datetime.datetime.now()
	43 47.633 MiB 0.008 MiB seq_time = dt2 - dt1
	44 47.633 MiB 0.000 MiB print result
	45 47.633 MiB 0.000 MiB print 'result = %s' % result
	46 47.633 MiB 0.000 MiB print 'seq_time = %s' % seq_time
	47
	48 47.633 MiB 0.000 MiB dt1 = datetime.datetime.now()
	49 48.477 MiB 0.844 MiB result = original(events, most_common)
	50 48.484 MiB 0.008 MiB dt2 = datetime.datetime.now()
	51 48.484 MiB 0.000 MiB org_time = dt2 - dt1
	52 48.484 MiB 0.000 MiB print 'original_time = %s' % org_time
	53 48.484 MiB 0.000 MiB print 'result = %s' % result