Skip to content

Instantly share code, notes, and snippets.

@robhammond
Created February 20, 2018 17:29
Show Gist options
  • Save robhammond/0dcc27214acd5df273a64aa2055b1f5a to your computer and use it in GitHub Desktop.
Save robhammond/0dcc27214acd5df273a64aa2055b1f5a to your computer and use it in GitHub Desktop.
Extract monthly data from Adobe Analytics to a Google BigQuery table. Once completed, use `cat *.json >> all.json` to combine files for import into BQ
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use DateTime;
use FindBin qw($Bin);
use Getopt::Long;
use Modern::Perl;
use Mojo::JSON qw(encode_json);
use lib ("$Bin/../lib");
use HiveAlpha::Common qw(parse_config);
use TMR::SiteCatalyst;
my $bin_dir = "$Bin/../bin";
my $config = parse_config("$Bin/../news_check.conf");
my ($first_date, $last_date);
my $date_granularity = 'month';
GetOptions (
"from=s" => \$first_date,
"to=s" => \$last_date,
"type=s" => \$date_granularity,
);
my $sites = {
mirror => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Mirror"
},
mirrormobile => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Mirror"
},
m4 => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Mirror"
},
ampp3d => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Mirror"
},
'manchester evening news' => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Manchester Evening News'
},
'manchester evening newsmobile' => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Manchester Evening News'
},
liverpoolecho => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Liverpool Echo'
},
liverpoolechomobile => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Liverpool Echo'
},
liverpoolechmobile => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Liverpool Echo'
},
dailyrecord => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'Scotland',
name => 'Daily Record'
},
dailyrecordmobile => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'Scotland',
name => 'Daily Record'
},
dailyrecord3 => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'Scotland',
name => 'Daily Record'
},
icwales2 => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Wales Online'
},
icwales2mobile => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Wales Online'
},
icWales => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Wales Online'
},
walesonline => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Wales Online'
},
walesonlinemobile => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Wales Online'
},
birminghammail => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Birmingham Mail'
},
irishmirror => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Irish Mirror"
},
irishmirrormobile => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Irish Mirror"
},
nechronicle => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Chronicle Live'
},
nechroniclemobile => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Chronicle Live'
},
hulldailymail => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Hull Daily Mail'
},
lwhulldaily => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'LW Hull Daily Mail'
},
'get surrey' => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Get Surrey'
},
bristolpost => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Bristol Post'
},
croydonadvertiser => {
siteGroup => 'Regionals',
siteSubGroup => 'Community Sites',
name => "Croydon Advertiser"
},
gazettelive2 => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Gazette Live'
},
glasgowlive => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'Scotland',
name => 'Glasgow Live'
},
devonlive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Devon Live'
},
lwdevonlive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'LW Devon Live'
},
belfastlive => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Belfast Live"
},
coventrytelegraph => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Coventry Telegraph'
},
dailypost => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Daily Post'
},
nottinghampost => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Nottingham Post'
},
leicestermercury => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Leicester Mercury'
},
plymouthherald => {
siteGroup => 'Regionals',
siteSubGroup => 'Regional Capital',
name => 'Plymouth Herald'
},
derbytelegraph => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Derby Telegraph'
},
dublinlive => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => "Dublin Live"
},
kentlive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Kent Live'
},
gloucestershirelive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Gloucestershire Live'
},
stokesentinel => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Stoke Sentinel'
},
cornwalllive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Cornwall Live'
},
huddexaminer => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Huddersfield Examiner'
},
somersetlive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Somerset Live'
},
essexlive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Essex Live'
},
'get west london' => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Get West London'
},
cambridgenews => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Cambridge News'
},
'get reading' => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Get Reading'
},
footballlondon => {
siteGroup => 'Regionals',
siteSubGroup => 'Other',
name => "Football London"
},
grimsbytelegraph => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Grimsby Telegraph'
},
chesterchronicle => {
siteGroup => 'Regionals',
siteSubGroup => 'Other',
name => "Chester Chronicle"
},
lincolnshirelive => {
siteGroup => 'Regionals',
siteSubGroup => 'County Sites',
name => 'Lincolnshire Live'
},
lwherts => {
siteGroup => 'Regionals',
siteSubGroup => 'Community Sites',
name => "Hertfordshire Mercury"
},
bathchronicle => {
siteGroup => 'Regionals',
siteSubGroup => 'Urban Hub',
name => 'Bath Chronicle'
},
rsvplive => {
siteGroup => 'UK Nats and Scotland',
siteSubGroup => 'UK Nats',
name => 'RSVP Live'
},
};
unless ($first_date) {
# $first_date = DateTime->now->subtract(days => 1)->ymd('');
# $last_date = DateTime->now->subtract(days => 1)->ymd('');
$first_date = DateTime->now(time_zone => 'Europe/London')->subtract(days => 1)->ymd();
$last_date = DateTime->now(time_zone => 'Europe/London')->subtract(days => 1)->ymd();
}
my $from = DateTime->new(
{
year => substr($first_date,0,4),
month => substr($first_date,5,2),
day => substr($first_date,8,2),
time_zone => 'Europe/London'
});
my $to = DateTime->new(
{
year => substr($last_date,0,4),
month => substr($last_date,5,2),
day => substr($last_date,8,2),
time_zone => 'Europe/London'
});
while ( $from <= $to ) {
say "running " . $from->ymd;
open my $fh, ">", "monthly-bq-" . $from->ymd . ".json" or die $!;
my $segments = [];
my $site_catalyst = TMR::SiteCatalyst->new(
username => $config->{'site_catalyst'}->{'creds'}->{'username'},
shared_secret => $config->{'site_catalyst'}->{'creds'}->{'shared_secret'},
api_subdomain => 'api3', # optional, api3 = london
);
my $report = $site_catalyst->instantiate_report(
report_suite_id => $config->{'site_catalyst'}->{'creds'}->{'report_suite_id'},
);
$report->queue(
dateFrom => $from->ymd,
dateTo => $to->ymd,
metrics => [
{ id => 'cm988_583593eb02f60c3f0654fbf0' }, # kpi: unique browsers
{ id => 'cm988_56b0dff3dd0ab12d4cbdac0c' }, # kpi: pageviews
{ id => 'cm988_585d18295fb23e5abcd8f6e6' }, # calc metric seo uv
{ id => 'cm988_5915579d97301a6927e0468c' }, # calc metric seo pv
{ id => 'cm988_585d187ea90b46318e4b79f4' }, # calc metric social uv
{ id => 'cm988_5915593c5fb23e52577c13de' }, # calc metric social pv
{ id => 'cm988_585d18b397301a0b131d73cc' }, # calc metric referral uv
{ id => 'cm988_5915597f1d8453766a6ff62d' }, # calc metric referral pv
{ id => 'cm988_585d18bb5fb23e5abcd8f6e7' }, # calc metric brand uv
{ id => 'cm988_591557fc01c1eb182edc379e' }, # calc metric brand pv
{ id => 'cm988_585d18bb1d845314b2d7f775' }, # calc metric dark uv
{ id => 'cm988_59155a191d8453766a6ff62e' }, # calc metric dark pv
{ id => 'cm988_589345930d38c2652a046d23' }, # calc metric desktop uv
{ id => 'cm988_5893453d366e5b10113cf399' }, # calc metric mobile uv
{ id => 'cm988_5893456397301a3142ffef81' }, # calc metric amp uv
{ id => 'cm988_589344e30d38c2652a046d22' }, # calc metric fbia uv
],
elements => [
{ id => 'prop5', top => 200 }, # owner site
],
segments => $segments,
dateGranularity => $date_granularity,
);
my $report_id = $report->get_id();
my $results;
for ( my $tries = 0; $tries < 1000; $tries++ ) {
if ($results = $report->retrieve( { report_id => $report_id } )) {
# $results = $report->retrieve();
last;
} else {
say "sleeping ($tries)...";
sleep 3;
}
}
my @json;
for my $r (@{$results->{'data'}->[0]->{'breakdown'}}) {
# next unless $sites->{$r->{'name'}};
my $siteName = $sites->{$r->{'name'}}->{'name'} || $r->{'name'};
push @json, {
siteName => $siteName,
eVar37 => $r->{'name'},
date => $from->ymd,
totals => {
visitors => $r->{'counts'}->[0] +0,
pageviews => $r->{'counts'}->[1] +0
},
trafficSource => {
seo => {
visitors => $r->{'counts'}->[2] +0,
pageviews => $r->{'counts'}->[3] +0
},
social => {
visitors => $r->{'counts'}->[4] +0,
pageviews => $r->{'counts'}->[5] +0
},
referral => {
visitors => $r->{'counts'}->[6] +0,
pageviews => $r->{'counts'}->[7] +0
},
brand => {
visitors => $r->{'counts'}->[8] +0,
pageviews => $r->{'counts'}->[9] +0
},
dark => {
visitors => $r->{'counts'}->[10] +0,
pageviews => $r->{'counts'}->[11] +0
},
},
platform => {
desktop => {
# pageviews => 0,
visitors => $r->{'counts'}->[12] +0
},
mobile => {
# pageviews => 0,
visitors => $r->{'counts'}->[13] +0
},
amp => {
# pageviews => 0,
visitors => $r->{'counts'}->[14] +0
},
fbia => {
# pageviews => 0,
visitors => $r->{'counts'}->[15] +0
},
}
};
}
for my $j (@json) {
say encode_json $j;
say $fh encode_json($j);
}
$from->add(months => 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment