Skip to content

Instantly share code, notes, and snippets.

@kenelliott
Created September 28, 2012 19:47
Show Gist options
  • Save kenelliott/3801763 to your computer and use it in GitHub Desktop.
Save kenelliott/3801763 to your computer and use it in GitHub Desktop.
random data
package BQ::OSS::Data::Warehouse::Build;
##############################################
# This package verifies and build extraction jobs
# based on xml file
#
# Job:
# Load XML file
# Check table for consistency
# Get last run info from DW extraction_jobs
# Populate bqdw.extraction_jobs
#
# TODO: Add database xml type attribute for active and inactive and use entire database list as a check this enables the xml list to include active and inactive databases
#
# get the jobs log for each database
# check against job log defined in xml
# if job log exists for job requested from xml
# add a new extraction job from the end of the last extraction job
# NOTE: should the previous job extraction status effect the creation of a new job?
# if a job log does not exist for the job requested from xml
# add a new extraction job from the beginning to a variable position
# NOTE: variable position should be defined based on previous extraction log elapsed seconds
#
##############################################
use strict;
use Site::MyDBI;
use XML::Simple;
use Data::Dumper;
use Site::MyConfig;
use base qw(BQ::OSS::Data::Warehouse::Utils);
use base qw(Site::Errors);
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {
REQ => undef,
RSP => undef,
CONFIG => {
ACTIVE_DATABASES => undef,
XML_FILE => undef,
XML_DS => undef,
DATABASES_DS => undef,
TABLES_DS => undef,
},
CONNECTIONS => {
BQ_DBI => undef,
DW_DBI => undef,
CLEC_DBI => undef,
},
};
bless ($self, $class);
# BQ database connection
$self->myDBI(Site::MyDBI->new);
# BQDW database connection
# TODO: find right way to ip
$self->get_bqdwDBI(Site::MyDBI->new('bqdw','192.168.101.15',undef,undef));
return $self;
}
##############################################
# run build sequence
##############################################
sub run
{
my $self = shift;
my $xml_file = shift;
if($self->parse_xml($xml_file)) {
foreach my $database(@{$self->get_databases_ds}) {
my $host = Site::MyConfig->getDBHost($database);
my $user = Site::MyConfig->getDBUser($database);
my $pass = Site::MyConfig->getDBPasswd($database);
my $port = Site::MyConfig->getStunnelPort(Site::MyConfig->getCluster($database)); # TODO: is this needed for production?
my $temp_dbh = $self->get_clecDBI(
Site::MyDBI->new(
Site::MyConfig->getDBHost($database),
$host,$user,$pass));
}
} else {
$self->add_internal_error({
DESCRIPTION => $_,
MESSAGE => 'Data Warehouse build failed because the XML FILE could not be parsed.',
});
return undef;
}
}
##############################################
# Request and Response
##############################################
sub request
{
my $self = shift;
if (@_) { $self->{REQ} = shift }
return $self->{REQ};
}
sub response
{
my $self = shift;
if (@_) { $self->{RSP} = shift }
return $self->{RSP};
}
##############################################
# Get/Set Database connections
##############################################
sub myDBI
{
my $self = shift;
if (@_) { $self->{CONNECTIONS}->{BQ_DBI} = shift }
return $self->{CONNECTIONS}->{BQ_DBI};
}
sub get_bqdwDBI
{
my $self = shift;
if (@_) { $self->{CONNECTIONS}->{BQDW_DBI} = shift }
return $self->{CONNECTIONS}->{BQDW_DBI};
}
sub get_clecDBI
{
my $self = shift;
if (@_) { $self->{CONNECTIONS}->{CLEC_DBI} = shift }
return $self->{CONNECTIONS}->{CLEC_DBI};
}
##############################################
# Get/Set Configuration Options
##############################################
sub get_xml_file
{
my $self = shift;
if(@_) { $self->{CONFIG}->{XML_FILE} = shift }
return $self->{CONFIG}->{XML_FILE};
}
sub get_xml_ds
{
my $self = shift;
if(@_) { $self->{CONFIG}->{XML_DS} = shift }
return $self->{CONFIG}->{XML_DS};
}
sub get_databases_ds
{
my $self = shift;
if(@_) { $self->{CONFIG}->{DATABASE_DS} = shift }
return $self->{CONFIG}->{DATABASE_DS};
}
sub get_tables_ds
{
my $self = shift;
if(@_) { $self->{CONFIG}->{TABLES_DS} = shift }
return $self->{CONFIG}->{TABLES_DS};
}
sub get_active_databases
{
my $self = shift;
# return list if it exists otherwise generate
if(lc(ref($self->{CONFIG}->{ACTIVE_DATABASES})) eq 'array') {
return $self->{CONFIG}->{ACTIVE_DATABASES};
}
# connect and set vars
my @database_list;
# get only active clec databases order from oldest to newest
my $sth = $self->myDBI->select("select databasename from clecprofile where status = 'ACTIVE' order by databasename asc");
my $row_count = $sth->numrows;
# get the database names
for (my $row = 0; $row < $row_count; $row++) {
my $data_hash = $sth->fetchrow($row);
push(@database_list,$data_hash->{'databasename'});
}
# assign to object
$self->{CONFIG}->{ACTIVE_DATABASES} = \@database_list;
# return the scalar hash reference
return $self->{CONFIG}->{ACTIVE_DATABASES};
}
##############################################
# Parse the data warehouse xml file
# Format:
# <bqdw>
# <databases type='all || exclude || include'><database>QT101</database></databases>
# <tables>
# <table name='ldusage'>
# <col type='int'>id</col>
# <col type='varchar(255)'>location</col>
# </table>
# ...
# </tables>
# </bqdw>
#
# Note: to use another source besides xml, just assign array references to get_tables_ds and get_databases_ds
#
##############################################
sub parse_xml
{
my $self = shift;
my $schema = {};
if($self->get_tables_ds && $self->get_databases_ds) { return 1; } # already defined somewhere else
# assign the xmldoc if passed here
if(@_) { $self->get_xml_file(shift) }
# source the xml
my $source_xml = XMLin($self->get_xml_file);
my $active_databases = $self->get_active_databases;
# create database run list from xml source document
# list type can be all/include/exclude
my $db_define_type = $source_xml->{databases}->{type};
if(lc($db_define_type) eq 'include') {
# check against database_names
# select only databases that are in the xml database list and the active clec database list
# TODO: internal error on xml databases not in active list
$schema->{databases} = $self->compare_arrays($active_databases,$source_xml->{databases}->{database},'intersect');
} elsif(lc($db_define_type) eq 'exclude') {
# check against database_names
# prune elements from database_list
$schema->{databases} = $self->compare_arrays($active_databases,$source_xml->{databases}->{database},'union');
} else {
# run all
$schema->{databases} = $active_databases;
}
# parse the tables from the xml source document
my @table_names = keys %{$source_xml->{tables}->{table}};
foreach my $table(@table_names) {
foreach my $col (values @{$source_xml->{tables}->{table}->{$table}->{col}}) {
$schema->{tables}->{$table}->{$col->{content}} = $col->{type};
}
}
# set build configuration
$self->get_tables_ds($schema->{tables});
$self->get_databases_ds($schema->{databases});
return 1;
}
return 1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment