Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:00
Show Gist options
  • Save gerbal/6149be791ab1222320fd to your computer and use it in GitHub Desktop.
Save gerbal/6149be791ab1222320fd to your computer and use it in GitHub Desktop.
import random
import time
def randomMAC():
mac = [0x00, 0x16, 0x3e,
random.randint(0x00, 0x7f),
random.randint(0x00, 0xff),
random.randint(0x00, 0xff)]
return ':'.join(map(lambda x: "%02x" % x, mac))
def randomIP():
not_valid = [10, 127, 169, 172, 192]
first = random.randrange(1, 256)
while first in not_valid:
first = random.randrange(1, 256)
ip = ".".join([str(first), str(random.randrange(1, 256)),
str(random.randrange(1, 256)), str(random.randrange(1, 256))])
return ip
def strTimeProp(start, end, format, prop):
"""Get a time at a proportion of a range of two formatted times.
start and end should be strings specifying times formated in the
given format (strftime-style), giving an interval [start, end].
prop specifies how a proportion of the interval to be taken after
start. The returned time will be in the specified format.
stime = time.mktime(time.strptime(start, format))
etime = time.mktime(time.strptime(end, format))
ptime = stime + prop * (etime - stime)
return time.strftime(format, time.localtime(ptime))
def randomDate(start, end, prop):
return strTimeProp(start, end, '%m/%d/%Y %I:%M %p', prop)
eightyIPs = []
for i in range(80):
hosts = ["", "",
"", ""]
site_owner = ["megacorp inc", "non-profic llc", "duetches web gbh"]
tld = [".com", ".gov", ".edu", ".org", ".net", ".io", ".hobbies",
".icann-messed-up-with-the-new-gTLDs", ".bogus", ".su", ""]
sites = ["nile", "brootoople", "alohationary", "pinkinternets", "unc", "plums",
"apples", "hotpants", "felinesrus", "email", "plantsforsale", "newtonianagronomy"]
file_extensions = [".exe", ".php", ".html",
".asp", ".xml", ".pdf", ".?=1234", ""]
pages = [
"search", "about", "index", "billing", "admin", "how-do-i-stop-the-burning", "sesamestreet",
"courses", "watch", "404", "careers", "item", "pies", "shipping", "forks", "magicians-for-hire"]
device = ["mobile", "tablet", "desktop", "desktop", "mobile"]
browser = ["firefox", "chrome", "safari", "ie6", "mosaic"]
OS = ["windows", "iOS", "OSX", "Linux", "Android"]
referral = ["organic", "referral", "direct"]
city = ["springfield", "jonesboro", "paris", "miloud", "send-help",
"southbend", "unicode-error", "townville", "citytown"]
country = ["USA", "France", "Belize", "The Moon",
"United Kingdom", "Germany", "Canada"]
def gen_sites():
return_sites = []
domain_names = []
while len(return_sites) <= 10:
random_domain = random.choice(sites) + random.choice(tld)
if random_domain not in domain_names:
[random_domain, random.choice(site_owner), random.choice(hosts)])
return return_sites
def gen_pages(sites_list):
return_pages = []
for a_site in sites_list:
ret_list = []
filenames_list = []
while len(ret_list) < 10:
page_name = random.choice(pages)
page_ext = random.choice(file_extensions)
if page_name not in filenames_list:
filename = a_site[0] + "/" + page_name + page_ext
handback = [filename, page_name, a_site[0]]
ret_list = ret_list + [handback]
return_pages = return_pages + ret_list
return return_pages
def gen_devices():
return_devices = []
for i in range(50):
device_desc = []
device_desc = [randomMAC(), random.choice(
device), random.choice(browser), random.choice(OS)]
return return_devices
def gen_visit(devices_list, pages_list, sites_list):
return_visits = []
# visit = date_time, duration, ip_address, traffic_source, city, country, device_id, browser, OS, filename
for genned_page in pages_list:
for i in range(random.randrange(20)):
random_device = random.choice(devices_list)
visit_record = [randomDate("4/25/2014 12:00 am", "5/2/2014 5:00 pm", random.random()), str(int(random.expovariate(1.0 / (random.randint(1, 20))))), random.choice(eightyIPs), random.choice(
referral), random.choice(city), random.choice(country), random_device[0], random_device[2], random_device[3], genned_page[0]]
return return_visits
def output_sql(table_name, meta_array):
giant_list = []
for i in meta_array:
table_str = "');\nINSERT INTO " + table_name + " VALUES('"
outstr = table_str.join(giant_list)
return "INSERT INTO " + table_name + " VALUES('" + outstr + "');"
db_sites_table = gen_sites()
db_pages_table = gen_pages(db_sites_table)
db_device_table = gen_devices()
db_visit_table = gen_visit(db_device_table, db_pages_table, db_sites_table)
populate_file = open("output.txt", "wb+")
websites_sql = output_sql("Website", db_sites_table)
webpage_sql = output_sql("web_Page", db_pages_table)
device_sql = output_sql("Device", db_device_table)
visit_sql = output_sql("visit", db_visit_table)
populate_file.write(websites_sql + "\n" +
webpage_sql + "\n" + device_sql + "\n" + visit_sql)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment