Skip to content

Instantly share code, notes, and snippets.

@randyzwitch
Last active March 9, 2016 22:36
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save randyzwitch/c44ff2a76d81fa1e77cb to your computer and use it in GitHub Desktop.
Save randyzwitch/c44ff2a76d81fa1e77cb to your computer and use it in GitHub Desktop.
Example (fake) transactions data generator
#R Code
library(arules)
data("Groceries")
write(Groceries, "groceries.txt", sep = ",")
#Python Code
import random, csv
from faker import Faker
fake = Faker()
from pandas import DataFrame
import pandas as pd
# Create customer file of 1,234,567 customers with fake data
# Use dataframe index as a way to generate unique customer id
customers = [fake.simple_profile() for x in range(0,1234567)]
customer_df = pd.DataFrame(customers)
customer_df["cust_id"] = customer_df.index
#Read in transactions file from arules package
with open("grocerydata.txt") as f:
transactions = f.readlines()
#Remove new line character
transactions = [x[0:-1] for x in transactions]
#Generate transactions by cust_id
#file format:
#cust_id::int
#store_id::int
#transaction_datetime::string/datetime
#items::string
#for each customer...
for i in range(0,1234567):
#...create a file...
with open('/transactions/custfile_%s' % i, 'w') as csvfile:
trans = csv.writer(csvfile, delimiter=' ', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#...that contains all of the transactions they've ever made
for j in range(1, random.randint(1,365)):
trans.writerow([i, fake.zipcode(), fake.date_time_this_decade(before_now=True, after_now=False), transactions[random.randint(0,len(transactions) - 1)]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment