Created
September 28, 2012 19:47
-
-
Save kenelliott/3801763 to your computer and use it in GitHub Desktop.
random data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package BQ::OSS::Data::Warehouse::Build; | |
############################################## | |
# This package verifies and build extraction jobs | |
# based on xml file | |
# | |
# Job: | |
# Load XML file | |
# Check table for consistency | |
# Get last run info from DW extraction_jobs | |
# Populate bqdw.extraction_jobs | |
# | |
# TODO: Add database xml type attribute for active and inactive and use entire database list as a check this enables the xml list to include active and inactive databases | |
# | |
# get the jobs log for each database | |
# check against job log defined in xml | |
# if job log exists for job requested from xml | |
# add a new extraction job from the end of the last extraction job | |
# NOTE: should the previous job extraction status effect the creation of a new job? | |
# if a job log does not exist for the job requested from xml | |
# add a new extraction job from the beginning to a variable position | |
# NOTE: variable position should be defined based on previous extraction log elapsed seconds | |
# | |
############################################## | |
use strict; | |
use Site::MyDBI; | |
use XML::Simple; | |
use Data::Dumper; | |
use Site::MyConfig; | |
use base qw(BQ::OSS::Data::Warehouse::Utils); | |
use base qw(Site::Errors); | |
sub new | |
{ | |
my $proto = shift; | |
my $class = ref($proto) || $proto; | |
my $self = { | |
REQ => undef, | |
RSP => undef, | |
CONFIG => { | |
ACTIVE_DATABASES => undef, | |
XML_FILE => undef, | |
XML_DS => undef, | |
DATABASES_DS => undef, | |
TABLES_DS => undef, | |
}, | |
CONNECTIONS => { | |
BQ_DBI => undef, | |
DW_DBI => undef, | |
CLEC_DBI => undef, | |
}, | |
}; | |
bless ($self, $class); | |
# BQ database connection | |
$self->myDBI(Site::MyDBI->new); | |
# BQDW database connection | |
# TODO: find right way to ip | |
$self->get_bqdwDBI(Site::MyDBI->new('bqdw','192.168.101.15',undef,undef)); | |
return $self; | |
} | |
############################################## | |
# run build sequence | |
############################################## | |
sub run | |
{ | |
my $self = shift; | |
my $xml_file = shift; | |
if($self->parse_xml($xml_file)) { | |
foreach my $database(@{$self->get_databases_ds}) { | |
my $host = Site::MyConfig->getDBHost($database); | |
my $user = Site::MyConfig->getDBUser($database); | |
my $pass = Site::MyConfig->getDBPasswd($database); | |
my $port = Site::MyConfig->getStunnelPort(Site::MyConfig->getCluster($database)); # TODO: is this needed for production? | |
my $temp_dbh = $self->get_clecDBI( | |
Site::MyDBI->new( | |
Site::MyConfig->getDBHost($database), | |
$host,$user,$pass)); | |
} | |
} else { | |
$self->add_internal_error({ | |
DESCRIPTION => $_, | |
MESSAGE => 'Data Warehouse build failed because the XML FILE could not be parsed.', | |
}); | |
return undef; | |
} | |
} | |
############################################## | |
# Request and Response | |
############################################## | |
sub request | |
{ | |
my $self = shift; | |
if (@_) { $self->{REQ} = shift } | |
return $self->{REQ}; | |
} | |
sub response | |
{ | |
my $self = shift; | |
if (@_) { $self->{RSP} = shift } | |
return $self->{RSP}; | |
} | |
############################################## | |
# Get/Set Database connections | |
############################################## | |
sub myDBI | |
{ | |
my $self = shift; | |
if (@_) { $self->{CONNECTIONS}->{BQ_DBI} = shift } | |
return $self->{CONNECTIONS}->{BQ_DBI}; | |
} | |
sub get_bqdwDBI | |
{ | |
my $self = shift; | |
if (@_) { $self->{CONNECTIONS}->{BQDW_DBI} = shift } | |
return $self->{CONNECTIONS}->{BQDW_DBI}; | |
} | |
sub get_clecDBI | |
{ | |
my $self = shift; | |
if (@_) { $self->{CONNECTIONS}->{CLEC_DBI} = shift } | |
return $self->{CONNECTIONS}->{CLEC_DBI}; | |
} | |
############################################## | |
# Get/Set Configuration Options | |
############################################## | |
sub get_xml_file | |
{ | |
my $self = shift; | |
if(@_) { $self->{CONFIG}->{XML_FILE} = shift } | |
return $self->{CONFIG}->{XML_FILE}; | |
} | |
sub get_xml_ds | |
{ | |
my $self = shift; | |
if(@_) { $self->{CONFIG}->{XML_DS} = shift } | |
return $self->{CONFIG}->{XML_DS}; | |
} | |
sub get_databases_ds | |
{ | |
my $self = shift; | |
if(@_) { $self->{CONFIG}->{DATABASE_DS} = shift } | |
return $self->{CONFIG}->{DATABASE_DS}; | |
} | |
sub get_tables_ds | |
{ | |
my $self = shift; | |
if(@_) { $self->{CONFIG}->{TABLES_DS} = shift } | |
return $self->{CONFIG}->{TABLES_DS}; | |
} | |
sub get_active_databases | |
{ | |
my $self = shift; | |
# return list if it exists otherwise generate | |
if(lc(ref($self->{CONFIG}->{ACTIVE_DATABASES})) eq 'array') { | |
return $self->{CONFIG}->{ACTIVE_DATABASES}; | |
} | |
# connect and set vars | |
my @database_list; | |
# get only active clec databases order from oldest to newest | |
my $sth = $self->myDBI->select("select databasename from clecprofile where status = 'ACTIVE' order by databasename asc"); | |
my $row_count = $sth->numrows; | |
# get the database names | |
for (my $row = 0; $row < $row_count; $row++) { | |
my $data_hash = $sth->fetchrow($row); | |
push(@database_list,$data_hash->{'databasename'}); | |
} | |
# assign to object | |
$self->{CONFIG}->{ACTIVE_DATABASES} = \@database_list; | |
# return the scalar hash reference | |
return $self->{CONFIG}->{ACTIVE_DATABASES}; | |
} | |
############################################## | |
# Parse the data warehouse xml file | |
# Format: | |
# <bqdw> | |
# <databases type='all || exclude || include'><database>QT101</database></databases> | |
# <tables> | |
# <table name='ldusage'> | |
# <col type='int'>id</col> | |
# <col type='varchar(255)'>location</col> | |
# </table> | |
# ... | |
# </tables> | |
# </bqdw> | |
# | |
# Note: to use another source besides xml, just assign array references to get_tables_ds and get_databases_ds | |
# | |
############################################## | |
sub parse_xml | |
{ | |
my $self = shift; | |
my $schema = {}; | |
if($self->get_tables_ds && $self->get_databases_ds) { return 1; } # already defined somewhere else | |
# assign the xmldoc if passed here | |
if(@_) { $self->get_xml_file(shift) } | |
# source the xml | |
my $source_xml = XMLin($self->get_xml_file); | |
my $active_databases = $self->get_active_databases; | |
# create database run list from xml source document | |
# list type can be all/include/exclude | |
my $db_define_type = $source_xml->{databases}->{type}; | |
if(lc($db_define_type) eq 'include') { | |
# check against database_names | |
# select only databases that are in the xml database list and the active clec database list | |
# TODO: internal error on xml databases not in active list | |
$schema->{databases} = $self->compare_arrays($active_databases,$source_xml->{databases}->{database},'intersect'); | |
} elsif(lc($db_define_type) eq 'exclude') { | |
# check against database_names | |
# prune elements from database_list | |
$schema->{databases} = $self->compare_arrays($active_databases,$source_xml->{databases}->{database},'union'); | |
} else { | |
# run all | |
$schema->{databases} = $active_databases; | |
} | |
# parse the tables from the xml source document | |
my @table_names = keys %{$source_xml->{tables}->{table}}; | |
foreach my $table(@table_names) { | |
foreach my $col (values @{$source_xml->{tables}->{table}->{$table}->{col}}) { | |
$schema->{tables}->{$table}->{$col->{content}} = $col->{type}; | |
} | |
} | |
# set build configuration | |
$self->get_tables_ds($schema->{tables}); | |
$self->get_databases_ds($schema->{databases}); | |
return 1; | |
} | |
return 1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment