Created
March 28, 2018 19:40
-
-
Save jamesbursa/13c74b40e14c4835302b0fbe57be6275 to your computer and use it in GitHub Desktop.
Dimensional modeling demo database (for PostgreSQL)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use POSIX qw(strftime); | |
my @BRANDS = qw(Acme Best Costlo Deluxe Essential Fresh Great Harvest Indigo); | |
my @PRODUCTS = qw(Almonds Eggs Milk Apple Pear Orange Carrot Pepper Bread | |
Croissant Chocolate Cola Potato Cream Tomato Chips | |
Cornflakes Granola Mozarella Cheddar Bacon); | |
my @CATEGORIES = qw(Snacks Sodas Dairy Seafood Bakery Fruit Vegetables); | |
my @DEPARTMENTS = qw(Grocery Pharmacy Cafe); | |
my @CITIES = qw(Springfield Cambridge Oxford Anytown Centerville); | |
my @STATES = qw(NY MA IL OH PA FL NC GA CA); | |
my $DATE_MAX = 20000; | |
open DATE_DIMENSION, ">", "date_dimension.tsv" or die "open failed: $!"; | |
print DATE_DIMENSION "COPY date_dimension (date_key, year, month_number, month_name, day_of_month, full_date_description, weekday, year_month, iso_date) FROM stdin;\n"; | |
for (my $d = 1; $d != $DATE_MAX; $d++) { | |
my $unix_time = $d * 60 * 60 * 24; | |
print DATE_DIMENSION strftime "$d\t%Y\t%-m\t%B\t%-d\t%x\t%A\t%Y-%m\t%F\n", gmtime $unix_time; | |
} | |
print DATE_DIMENSION "\\.\n"; | |
my $PRODUCT_MAX = 10000; | |
open PRODUCT_DIMENSION, ">", "product_dimension.tsv" or die "open failed: $!"; | |
print PRODUCT_DIMENSION "COPY product_dimension (product_key, sku_number, product_description, brand, category, department, package_size) FROM stdin;\n"; | |
for (my $i = 1; $i != $PRODUCT_MAX; $i++) { | |
my $sku = 100000000 + int(rand(900000000)); | |
my $brand = pick_random(\@BRANDS); | |
my $product = pick_random(\@PRODUCTS); | |
my $category = pick_random(\@CATEGORIES); | |
my $department = pick_random(\@DEPARTMENTS); | |
my $package_size = 1 + int(rand(10)); | |
printf PRODUCT_DIMENSION "%i\t%i\t%s\t%s\t%s\t%s\t%s\n", $i, $sku, | |
"$brand $product", $brand, $category, $department, $package_size; | |
} | |
print PRODUCT_DIMENSION "\\.\n"; | |
my $STORE_MAX = 100; | |
open STORE_DIMENSION, ">", "store_dimension.tsv" or die "open failed: $!"; | |
print STORE_DIMENSION "COPY store_dimension (store_key, store_street_address, store_city, store_city_state, store_state, store_zip, square_footage) FROM stdin;\n"; | |
for (my $i = 1; $i != $STORE_MAX; $i++) { | |
my $address_num = 1 + int(rand(1000)); | |
my $address_st = chr(65 + int(rand(26))); | |
my $address = "$address_num $address_st Street"; | |
my $city = pick_random(\@CITIES); | |
my $state = pick_random(\@STATES); | |
my $zip = 10000 + int(rand(90000)); | |
my $sqft = 100 * int(rand(1000)); | |
printf STORE_DIMENSION "%i\t%s\t%s\t%s\t%s\t%s\t%i\n", $i, | |
$address, $city, "$city, $state", $state, $zip, $sqft; | |
} | |
print STORE_DIMENSION "\\.\n"; | |
open SALES_FACT, ">", "sales_fact.tsv" or die "open failed: $!"; | |
print SALES_FACT "COPY sales_fact (date_key, product_key, store_key, sales_quantity, unit_price, extended_price) FROM stdin;\n"; | |
for (my $i = 1; $i != 10000000; $i++) { | |
my $date_key = 1 + int(rand($DATE_MAX - 1)); | |
my $product_key = 1 + int(rand($PRODUCT_MAX - 1)); | |
my $store_key = 1 + int(rand($STORE_MAX - 1)); | |
my $sales_quantity = 1 + int(rand(10)); | |
my $unit_price = 1 + int(rand(10000)); | |
my $extended_price = $sales_quantity * $unit_price; | |
printf SALES_FACT "%i\t%i\t%i\t%i\t%i\t%i\n", | |
$date_key, $product_key, $store_key, $sales_quantity, $unit_price, $extended_price; | |
} | |
print SALES_FACT "\\.\n"; | |
sub pick_random { | |
my $picks = shift; | |
return $picks->[int(rand(scalar @$picks))]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
./generate_data.pl | |
dropdb dimensional_demo | |
createdb dimensional_demo | |
psql dimensional_demo --file=tables.sql | |
psql dimensional_demo --file=date_dimension.tsv | |
psql dimensional_demo --file=product_dimension.tsv | |
psql dimensional_demo --file=store_dimension.tsv | |
psql dimensional_demo --file=sales_fact.tsv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment