Skip to content

Instantly share code, notes, and snippets.

@jamesbursa
Created March 28, 2018 19:40
Show Gist options
  • Save jamesbursa/13c74b40e14c4835302b0fbe57be6275 to your computer and use it in GitHub Desktop.
Save jamesbursa/13c74b40e14c4835302b0fbe57be6275 to your computer and use it in GitHub Desktop.
Dimensional modeling demo database (for PostgreSQL)
#!/usr/bin/perl
use strict;
use warnings;
use POSIX qw(strftime);
my @BRANDS = qw(Acme Best Costlo Deluxe Essential Fresh Great Harvest Indigo);
my @PRODUCTS = qw(Almonds Eggs Milk Apple Pear Orange Carrot Pepper Bread
Croissant Chocolate Cola Potato Cream Tomato Chips
Cornflakes Granola Mozarella Cheddar Bacon);
my @CATEGORIES = qw(Snacks Sodas Dairy Seafood Bakery Fruit Vegetables);
my @DEPARTMENTS = qw(Grocery Pharmacy Cafe);
my @CITIES = qw(Springfield Cambridge Oxford Anytown Centerville);
my @STATES = qw(NY MA IL OH PA FL NC GA CA);
my $DATE_MAX = 20000;
open DATE_DIMENSION, ">", "date_dimension.tsv" or die "open failed: $!";
print DATE_DIMENSION "COPY date_dimension (date_key, year, month_number, month_name, day_of_month, full_date_description, weekday, year_month, iso_date) FROM stdin;\n";
for (my $d = 1; $d != $DATE_MAX; $d++) {
my $unix_time = $d * 60 * 60 * 24;
print DATE_DIMENSION strftime "$d\t%Y\t%-m\t%B\t%-d\t%x\t%A\t%Y-%m\t%F\n", gmtime $unix_time;
}
print DATE_DIMENSION "\\.\n";
my $PRODUCT_MAX = 10000;
open PRODUCT_DIMENSION, ">", "product_dimension.tsv" or die "open failed: $!";
print PRODUCT_DIMENSION "COPY product_dimension (product_key, sku_number, product_description, brand, category, department, package_size) FROM stdin;\n";
for (my $i = 1; $i != $PRODUCT_MAX; $i++) {
my $sku = 100000000 + int(rand(900000000));
my $brand = pick_random(\@BRANDS);
my $product = pick_random(\@PRODUCTS);
my $category = pick_random(\@CATEGORIES);
my $department = pick_random(\@DEPARTMENTS);
my $package_size = 1 + int(rand(10));
printf PRODUCT_DIMENSION "%i\t%i\t%s\t%s\t%s\t%s\t%s\n", $i, $sku,
"$brand $product", $brand, $category, $department, $package_size;
}
print PRODUCT_DIMENSION "\\.\n";
my $STORE_MAX = 100;
open STORE_DIMENSION, ">", "store_dimension.tsv" or die "open failed: $!";
print STORE_DIMENSION "COPY store_dimension (store_key, store_street_address, store_city, store_city_state, store_state, store_zip, square_footage) FROM stdin;\n";
for (my $i = 1; $i != $STORE_MAX; $i++) {
my $address_num = 1 + int(rand(1000));
my $address_st = chr(65 + int(rand(26)));
my $address = "$address_num $address_st Street";
my $city = pick_random(\@CITIES);
my $state = pick_random(\@STATES);
my $zip = 10000 + int(rand(90000));
my $sqft = 100 * int(rand(1000));
printf STORE_DIMENSION "%i\t%s\t%s\t%s\t%s\t%s\t%i\n", $i,
$address, $city, "$city, $state", $state, $zip, $sqft;
}
print STORE_DIMENSION "\\.\n";
open SALES_FACT, ">", "sales_fact.tsv" or die "open failed: $!";
print SALES_FACT "COPY sales_fact (date_key, product_key, store_key, sales_quantity, unit_price, extended_price) FROM stdin;\n";
for (my $i = 1; $i != 10000000; $i++) {
my $date_key = 1 + int(rand($DATE_MAX - 1));
my $product_key = 1 + int(rand($PRODUCT_MAX - 1));
my $store_key = 1 + int(rand($STORE_MAX - 1));
my $sales_quantity = 1 + int(rand(10));
my $unit_price = 1 + int(rand(10000));
my $extended_price = $sales_quantity * $unit_price;
printf SALES_FACT "%i\t%i\t%i\t%i\t%i\t%i\n",
$date_key, $product_key, $store_key, $sales_quantity, $unit_price, $extended_price;
}
print SALES_FACT "\\.\n";
sub pick_random {
my $picks = shift;
return $picks->[int(rand(scalar @$picks))];
}
#!/bin/bash
./generate_data.pl
dropdb dimensional_demo
createdb dimensional_demo
psql dimensional_demo --file=tables.sql
psql dimensional_demo --file=date_dimension.tsv
psql dimensional_demo --file=product_dimension.tsv
psql dimensional_demo --file=store_dimension.tsv
psql dimensional_demo --file=sales_fact.tsv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment