-
-
Save randyzwitch/c44ff2a76d81fa1e77cb to your computer and use it in GitHub Desktop.
Example (fake) transactions data generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#R Code | |
library(arules) | |
data("Groceries") | |
write(Groceries, "groceries.txt", sep = ",") | |
#Python Code | |
import random, csv | |
from faker import Faker | |
fake = Faker() | |
from pandas import DataFrame | |
import pandas as pd | |
# Create customer file of 1,234,567 customers with fake data | |
# Use dataframe index as a way to generate unique customer id | |
customers = [fake.simple_profile() for x in range(0,1234567)] | |
customer_df = pd.DataFrame(customers) | |
customer_df["cust_id"] = customer_df.index | |
#Read in transactions file from arules package | |
with open("grocerydata.txt") as f: | |
transactions = f.readlines() | |
#Remove new line character | |
transactions = [x[0:-1] for x in transactions] | |
#Generate transactions by cust_id | |
#file format: | |
#cust_id::int | |
#store_id::int | |
#transaction_datetime::string/datetime | |
#items::string | |
#for each customer... | |
for i in range(0,1234567): | |
#...create a file... | |
with open('/transactions/custfile_%s' % i, 'w') as csvfile: | |
trans = csv.writer(csvfile, delimiter=' ', quotechar='"', quoting=csv.QUOTE_MINIMAL) | |
#...that contains all of the transactions they've ever made | |
for j in range(1, random.randint(1,365)): | |
trans.writerow([i, fake.zipcode(), fake.date_time_this_decade(before_now=True, after_now=False), transactions[random.randint(0,len(transactions) - 1)]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment