Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
fake clickstream data generator
#!/user/bin/env python
import psycopg2
import datetime
import random
import time
import sys
def randuser():
return (int(random.random() * 100) + 1)
def randpage():
return (int(random.random() * 100) + 1)
def writeseen( sessinfo, sessnum ):
ts = str(datetime.datetime.now())
cur.execute("INSERT INTO seenstream ( user_id, page_id, ts ) VALUES ( %s, %s, now() )",
(sessinfo[sessnum]["user"], sessinfo[sessnum]["page"],))
conn.commit()
return True
def randsleep():
time.sleep(random.random())
return True
# connect to database
conninfo = 'host=127.0.0.1 user=pipeline dbname=seen port=6543 password=pipeline'
conn = psycopg2.connect(conninfo)
cur = conn.cursor()
tsession = {}
try:
# loop forever, or at least until ctrl-c
while True:
# loop among four concurrent sessions
for s in range(1, 4):
# new session
if s not in tsession:
tsession[s] = { "user" : randuser(),
"page" : randpage() }
else:
pick = random.random()
if pick < 0.1:
# terminate session
del tsession[s]
continue
elif pick >= 0.1 and pick < 0.3:
# no activity
continue
else:
# pick page
tsession[s]["page"] = randpage()
writeseen(tsession, s)
randsleep()
except KeyboardInterrupt:
print "stream generation halted"
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.