Created
August 11, 2015 23:33
-
-
Save jberkus/a4457d40a758f7eca1e8 to your computer and use it in GitHub Desktop.
fake clickstream data generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/user/bin/env python | |
import psycopg2 | |
import datetime | |
import random | |
import time | |
import sys | |
def randuser(): | |
return (int(random.random() * 100) + 1) | |
def randpage(): | |
return (int(random.random() * 100) + 1) | |
def writeseen( sessinfo, sessnum ): | |
ts = str(datetime.datetime.now()) | |
cur.execute("INSERT INTO seenstream ( user_id, page_id, ts ) VALUES ( %s, %s, now() )", | |
(sessinfo[sessnum]["user"], sessinfo[sessnum]["page"],)) | |
conn.commit() | |
return True | |
def randsleep(): | |
time.sleep(random.random()) | |
return True | |
# connect to database | |
conninfo = 'host=127.0.0.1 user=pipeline dbname=seen port=6543 password=pipeline' | |
conn = psycopg2.connect(conninfo) | |
cur = conn.cursor() | |
tsession = {} | |
try: | |
# loop forever, or at least until ctrl-c | |
while True: | |
# loop among four concurrent sessions | |
for s in range(1, 4): | |
# new session | |
if s not in tsession: | |
tsession[s] = { "user" : randuser(), | |
"page" : randpage() } | |
else: | |
pick = random.random() | |
if pick < 0.1: | |
# terminate session | |
del tsession[s] | |
continue | |
elif pick >= 0.1 and pick < 0.3: | |
# no activity | |
continue | |
else: | |
# pick page | |
tsession[s]["page"] = randpage() | |
writeseen(tsession, s) | |
randsleep() | |
except KeyboardInterrupt: | |
print "stream generation halted" | |
sys.exit(1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment