Skip to content

Instantly share code, notes, and snippets.

@jberkus
Created August 11, 2015 23:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jberkus/a4457d40a758f7eca1e8 to your computer and use it in GitHub Desktop.
Save jberkus/a4457d40a758f7eca1e8 to your computer and use it in GitHub Desktop.
fake clickstream data generator
#!/user/bin/env python
import psycopg2
import datetime
import random
import time
import sys
def randuser():
return (int(random.random() * 100) + 1)
def randpage():
return (int(random.random() * 100) + 1)
def writeseen( sessinfo, sessnum ):
ts = str(datetime.datetime.now())
cur.execute("INSERT INTO seenstream ( user_id, page_id, ts ) VALUES ( %s, %s, now() )",
(sessinfo[sessnum]["user"], sessinfo[sessnum]["page"],))
conn.commit()
return True
def randsleep():
time.sleep(random.random())
return True
# connect to database
conninfo = 'host=127.0.0.1 user=pipeline dbname=seen port=6543 password=pipeline'
conn = psycopg2.connect(conninfo)
cur = conn.cursor()
tsession = {}
try:
# loop forever, or at least until ctrl-c
while True:
# loop among four concurrent sessions
for s in range(1, 4):
# new session
if s not in tsession:
tsession[s] = { "user" : randuser(),
"page" : randpage() }
else:
pick = random.random()
if pick < 0.1:
# terminate session
del tsession[s]
continue
elif pick >= 0.1 and pick < 0.3:
# no activity
continue
else:
# pick page
tsession[s]["page"] = randpage()
writeseen(tsession, s)
randsleep()
except KeyboardInterrupt:
print "stream generation halted"
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment