Last active
October 14, 2019 20:48
-
-
Save oneillo/18c53a03caec5cfa92a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use LWP::UserAgent; | |
use HTTP::Request; | |
use JSON::XS; | |
use POSIX qw(ceil strftime); | |
use Time::HiRes qw(usleep); | |
use Time::Local; | |
use DateTime::Format::Strptime; | |
# GLOBAL VARIABLES | |
# I KNOW THIS IS BAD...SO SHOOT ME | |
my $startDate; # Format should be '2015-07-01'; | |
my $endDate; # Format should be '2015-08-01'; | |
my $timestamp = `date +"%Y%m%d%H%M"`; # Timestamp captures when the script is run and is used to name the output files that it creates | |
chomp($timestamp); | |
# | |
# parse_args READS IN OPTIONS THAT ARE PASSED TO THE SCRIPT WHEN IT IS RUN, I.E. get_shopify_data.pl -s 2015-07-01 -e 2015-07-31 | |
# THE ONLY OPTIONS RIGHT NOW ARE -H FOR HELP AND -S AND -E FOR THE START AND END DATES TO GET DATA FOR | |
sub parse_args() | |
{ | |
my $arg; | |
my @argv = @ARGV; #THIS GETS THE ARRAY OF OPTIONS PASSED WHEN THE SCRIPT WAS RUN | |
# CALL THE HELP FUNCTION IF NO OPTIONS WERE PASSED WHEN IT WAS RUN | |
if($#argv == "-1") | |
{ | |
help(); | |
} | |
# GO THROUGH THE OPTIONS AND PULL OUT THE START AND END DATE | |
while(@argv) | |
{ | |
$arg = shift(@argv); | |
if($arg eq "-s" || $arg eq "-S") | |
{ | |
if($#argv == "-1") | |
{ | |
die "Error: Missing start date after -s option\n"; | |
} | |
$startDate = shift(@argv); | |
} | |
elsif($arg eq "-e" || $arg eq "-E") | |
{ | |
if($#argv == "-1") | |
{ | |
die "Error: Missing end date after -e option\n"; | |
} | |
$endDate = shift(@argv); | |
} | |
elsif($arg eq "-h" || $arg eq "-H") | |
{ | |
help(); | |
} | |
else | |
{ | |
die "Illegal option: $arg.\n"; | |
} | |
} | |
} | |
# | |
# HELP FUNCTION THAT OUTPUTS THE OPTIONS THAT NEED TO BE PASSED TO THE SCRIPT WHEN IT'S RUN | |
sub help | |
{ | |
print "Usage:\n", | |
" get_shopify_data.pl <OPTIONS>\n", | |
" Required:\n", | |
" -s START_DATE : The first day in the period you want to pull data for. In YYYY-MM-DD format, e.g. 2015-07-01 \n", | |
" -e END_DATE : The last day in the period you want to pull data for. In YYYY-MM-DD format, e.g. 2015-07-31 \n", | |
" -h : See the help info \n"; | |
die "\n"; | |
} | |
# | |
# THE MAIN FUNCTION THAT LOGS INTO THE SHOPIFY ACCOUNT, PULLS THE DATA, AND OUTPUTES METRICS TO LOG FILES IN THE SAME FOLDER WHERE THE SCRIPT IS LOCATED | |
sub get_store_data | |
{ | |
# | |
# CREATE THE LOG FILE NAMES WITH THE RANGE OF DAYS WE ARE GETTING DATA FOR | |
my $parser = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d'); | |
my $origEnd = $endDate; | |
my $origStartDate = $parser->parse_datetime($startDate); | |
my $origEndDate = $parser->parse_datetime($endDate); | |
my $dateRange = $origStartDate->strftime('%m%d%Y') . "-" . $origEndDate->strftime('%m%d%Y'); | |
my $logfile = "Shopify_raw_data_$dateRange\_$timestamp.csv"; # CSV file where the raw data is stored when the script finishes; each row corresponds to an order | |
my $periodsummaryfile = "Shopify_summary_$dateRange\_$timestamp.csv"; # CSV file where the summary metrics are stored when the script finishes | |
# | |
# ADD 1 DAY TO THE END TIME TO GET BACK ALL OF THE DATA FOR THAT DAY TOO | |
# OTHERWISE, IF YOU PASS AN END DATE OF 2015-07-31, IT WILL ONLY INCLUDE ORDERS UP TO THE DAY BEFORE, 2015-07-30 | |
$origEndDate->add(days => 1); | |
$endDate = $origEndDate->strftime('%Y-%m-%d'); | |
# CREATE A TIMESTAMPED FILENAME FOR WRITING THE RAW DATA | |
# AND THEN OPEN THE FILE TO WRITE DATA TO IT | |
open(my $log, '>', $logfile) or die "Could not open file '$logfile' $!"; | |
open(my $summaryFile, '>', $periodsummaryfile) or die "Could not open file '$periodsummaryfile' $!"; | |
# | |
# PRINT THE HEADERS FOR THE COLUMNS OF DATA TO THE OUTPUT FILES | |
print $log "Store_name,Order_name,Created_at,Email,Name,Subtotal_price,Total_discounts,Total_tax,Source_name,Shipping_costs,Gift_Cards_Used,Store_Credit_Used,Order_Type\n"; | |
print $summaryFile "Period_Start,Period_End,Store_Name,Channel,Gross_Revenue,Discounts,Gift_Cards_Used,Store_Credit_Used,Gross_Sales_Tax,Orders,Unique_Customers,Orders_with_no_email\n"; | |
# | |
# ARRAY OF SHOPIFY STORE LOGIN INFORMATION | |
# INCLUDE THE CREDENTIALS FOR ALL OF THE SHOPIFY STORES THAT YOU WANT TO PULL DATA FROM | |
# STORE NAME (WHATEVER YOU WANT TO CALL EACH ONE), THE STORE'S URL, THE SHOPIFY API KEY, AND THE SHOPIFY API PASSWORD | |
# | |
my @shopifyStores = | |
( | |
{ | |
name => 'INSERT_STORE_NAME_HERE', #INFO FOR THE FIRST SHOPIFY STORE/ACCOUNT | |
url => 'INSERT_STORE_URL_HERE', | |
apiKey => 'INSERT_STORE_API_KEY_HERE', | |
apiPass => 'INSERT_STORE_API_PASSWORD_HERE' | |
}, | |
{ | |
name => 'INSERT_STORE_NAME_HERE', #INFO FOR THE SECOND SHOPIFY STORE/ACCOUNT | |
url => 'INSERT_STORE_URL_HERE', | |
apiKey => 'INSERT_STORE_API_KEY_HERE', | |
apiPass => 'INSERT_STORE_API_PASSWORD_HERE' | |
}, | |
# COPY AND ADD ANOTHER BLOCK OF THESE FOR EACH SHOPIFY STORE YOU ARE PULLING DATA FROM | |
{ | |
name => 'INSERT_STORE_NAME_HERE', #INFO FOR THE THIRD SHOPIFY STORE/ACCOUNT | |
url => 'INSERT_STORE_URL_HERE', | |
apiKey => 'INSERT_STORE_API_KEY_HERE', | |
apiPass => 'INSERT_STORE_API_PASSWORD_HERE' | |
}, | |
); | |
# | |
# NOW GO THROUGH EACH STORE DEFINED IN THE @shopifyStores ARRAY ABOVE | |
foreach my $shopifyStore (@shopifyStores) | |
{ | |
my $storeUrl = $shopifyStore->{'url'}; | |
my $apiKey = $shopifyStore->{'apiKey'}; | |
my $apiPass = $shopifyStore->{'apiPass'}; | |
# | |
# VARIABLES TO HOLD STORE DATA | |
my $periodOrderCount = 0; # number of orders during the defined period | |
my $pagesOfOrders = 0; # number of pages of orders. Shopify only allows you to get up to 250 orders at a time, so if you have more than that, you need to use pagination to get them all | |
my @orders = (); # an array to hold all of the order information | |
my @threePCustomers = (); # array to hold the emails of customers that placed an order through third-party channels that have integrated with our store | |
my @retailCustomers = (); # array to hold the emails of customers that placed an order through one of our retail stores | |
my @webCustomers = (); # array to hold the emails of the customers that placed an order through our online store | |
my $roninWebOrders = 0; # these variables end up holding the number of orders in each channel that don't have an email associated with them | |
my $ronin3POrders = 0; | |
my $roninRetailOrders = 0; | |
my $periodWebSubtotal = 0; # metrics that are collected for the online channel for the Shopify store | |
my $periodWebGrossSalesTax = 0; | |
my $periodWebDiscounts = 0; | |
my $periodWebGiftCardsUsed = 0; | |
my $periodWebShipping = 0; | |
my $periodWebOrders = 0; | |
my $periodWebStoreCreditUsed = 0; | |
my $periodRetailSubtotal = 0; # metrics that are collected for the POS channel for the Shopify store | |
my $periodRetailGrossSalesTax = 0; | |
my $periodRetailDiscounts = 0; | |
my $periodRetailGiftCardsUsed = 0; | |
my $periodRetailShipping = 0; | |
my $periodRetailOrders = 0; | |
my $periodRetailStoreCreditUsed = 0; | |
my $period3pSubtotal = 0; # metrics that are collected for the 3rd-party channels connected to the Shopify store | |
my $period3pGrossSalesTax = 0; | |
my $period3pDiscounts = 0; | |
my $period3pGiftCardsUsed = 0; | |
my $period3pShipping = 0; | |
my $period3pOrders = 0; | |
my $period3pStoreCreditUsed = 0; | |
# http://user:password@www.domain.com/script.pl | |
# THE SHOPIFY URL THAT IS CALLED TO GET BACK THE # OF ORDERS DURING THE PERIOD OF TIME | |
my $URL = "https://$apiKey:$apiPass\@$storeUrl/admin/orders/count.json?created_at_min=$startDate 00:00&created_at_max=$endDate 00:00"; | |
my $ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 1 }); | |
my $header = HTTP::Request->new(GET => $URL); | |
my $request = HTTP::Request->new('GET', $URL, $header); | |
# GET THE JSON DATA PASSED BACK AND ASSIGN THE # OF ORDERS TO THE PERIODORDERCOUNT VARIABLE | |
my $response = $ua->request($request); | |
my $arrayref = decode_json($response->content); | |
$periodOrderCount = $arrayref->{count}; | |
# CALCULATE THE TOTAL # OF PAGES OF ORDERS = # OF ORDERS / 250 ORDERS | |
$pagesOfOrders = ceil( $arrayref->{count} / 250); | |
# | |
# NOW PULL THE INFORMATION FOR ALL OF THE ORDERS PLACED DURING THE PERIOD | |
for(my $i = 1; $i <= $pagesOfOrders; $i++) | |
{ | |
# CREATE THE URL THAT WE'LL SEND TO SHOPIFY TO GET BACK DATA ABOUT ALL OF THE ORDERS | |
$URL = "https://$apiKey:$apiPass\@$storeUrl/admin/orders.json?page=$i&limit=250&created_at_min=$startDate 00:00&created_at_max=$endDate 00:00&fields=created_at, id, name, email, subtotal_price, total_discounts, total_tax, shipping_lines, source_name,customer,discount_codes"; | |
$request = HTTP::Request->new('GET', $URL, $header); | |
$response = $ua->request($request); | |
my $ordersObj = decode_json($response->content); | |
# ADD EACH PAGE OF ORDERS TO THE ORDERS ARRAY UNTIL WE HAVE ALL OF THEM FOR THIS SHOPIFY STORE FOR THE PERIOD | |
push(@orders, @{ $ordersObj->{'orders'} }); | |
} | |
# | |
# LOOP THROUGH EACH ORDER AND PULL SUMMARY STATS | |
my $orderType = ""; | |
foreach my $item ( @orders ) | |
{ | |
# | |
# SOME VARIABLES TO HOLD ORDER INFO THAT I PULL FROM HASHES | |
my $orderShipping = 0; | |
my $orderGiftCardsUsed = 0; | |
my $orderStoreCreditUsed = 0; | |
# | |
# DETERMINE WHERE TO COUNT ORDER BASED ON SOURCE NAME: RETAIL, WEB, 3RD-PARTY | |
# CHECK THIS TO MAKE SURE YOU ARE DEFINING RETAIL VS WEB VS 3RD-PARTY CHANNELS THE SAME WAY | |
if($item->{"source_name"} eq "pos" || $item->{"source_name"} eq "iphone" || $item->{"source_name"} eq "android" || $item->{"source_name"} eq "shopify_draft_order") | |
{ | |
$orderType = "retail"; | |
} | |
elsif($item->{"source_name"} eq "ENTER_NAME_OF_ANY_3RD_PARTY_CHANNELS_HERE") | |
{ | |
$orderType = "3p"; | |
} | |
else | |
{ | |
$orderType = "web"; | |
} | |
# | |
# GET THE SHIPPING COST FOR THE ORDER | |
my @shipping_lines = @{ $item->{'shipping_lines'} }; | |
if(scalar @shipping_lines > 0) | |
{ | |
foreach my $f ( @shipping_lines ) | |
{ | |
$orderShipping += $f->{"price"}; | |
} | |
} | |
# | |
# SLEEP STATEMENT HERE IS TO AVOID TRIPPING SHOPIFY'S 2 API REQUESTS PER SECOND LIMIT | |
usleep(60000); | |
# | |
# NOW GET TRANSACTION INFORMATION FOR EVERY ORDER | |
# NEED THIS IN ORDER TO PULL OUT IF THE CUSTOMER PAID FOR THE ORDER VIA STORE CREDIT OR GIFT CARDS, WHICH YOU DON'T WANT TO DOUBLE COUNT AS REVENUE | |
$URL = "https://$apiKey:$apiPass\@$storeUrl/admin/orders/$item->{'id'}/transactions.json?fields=amount,gateway, source_name, kind, status"; | |
$request = HTTP::Request->new('GET', $URL, $header); | |
$response = $ua->request($request); | |
#if($response->is_error) | |
#{ | |
# print "Error:$URL\n"; | |
# print $response->error_as_HTML; | |
#} | |
my $transactionsObj = decode_json($response->content); | |
# | |
# TRACK GIFT CARD AND STORE CREDIT $ USED TO PAY FOR ORDERS | |
# THIS SHOULD BE ACCOUNTED FOR DIFFERENTLY VIA REVENUE RECOGNITION GUIDELINES | |
my @transactions = @{ $transactionsObj->{'transactions'} }; | |
foreach my $trans (@transactions) | |
{ | |
if($trans->{'kind'} eq "sale" && $trans->{'gateway'} eq "gift_card") | |
{ | |
$orderGiftCardsUsed += $trans->{'amount'}; | |
} | |
elsif($trans->{'kind'} eq "refund" && $trans->{'gateway'} eq "gift_card") | |
{ | |
$orderGiftCardsUsed -= $trans->{'amount'}; | |
} | |
elsif($trans->{'kind'} eq "sale" && $trans->{'gateway'} eq "store-credit") | |
{ | |
$orderStoreCreditUsed += $trans->{'amount'}; | |
} | |
elsif($trans->{'kind'} eq "refund" && $trans->{'gateway'} eq "store-credit") | |
{ | |
$orderStoreCreditUsed -= $trans->{'amount'}; | |
} | |
} | |
# | |
# GET CUSTOMER INFORMATION FOR THE ORDER | |
# PULLING OUT THE CUSTOMER'S EMAIL ADDRESS OR NAME IF IT EXISTS | |
# THE ORDER INFO ALSO CONTAINS A CUSTOMER EMAIL, BUT I'VE FOUND THAT SOMETIMES THAT ONE IS BLANK, WHEREAS THE CUSTOMER INFO ISN'T | |
my $customer = $item->{'customer'}; | |
my $customerEmail = ""; | |
my $customerName = ""; | |
if(defined $customer->{'email'}) | |
{ | |
$customerEmail = $customer->{'email'}; | |
} | |
if(defined $customer->{'first_name'} && defined $customer->{'last_name'}) | |
{ | |
# REMOVE ANY COMMAS IN THE NAME SO WE DON'T RUN INTO PROBLEMS WITH THE CSV FILES LATER | |
my $custFirstName = $customer->{'first_name'}; | |
$custFirstName =~ s/,/ /g; | |
my $custLastName = $customer->{'last_name'}; | |
$custLastName =~ s/,/ /g; | |
$customerName = "$custFirstName $custLastName"; | |
} | |
# | |
# ADD THE VALUES TO THE RIGHT PLACES BASED ON THE ORDER TYPE: RETAIL, WEB, OR 3RD-PARTY | |
if($orderType eq "retail") | |
{ | |
$periodRetailOrders++; | |
$periodRetailSubtotal += $item->{"subtotal_price"}; | |
$periodRetailDiscounts += $item->{"total_discounts"}; | |
$periodRetailGrossSalesTax += $item->{"total_tax"}; | |
$periodRetailShipping += $orderShipping; | |
$periodRetailGiftCardsUsed += $orderGiftCardsUsed; | |
$periodRetailStoreCreditUsed += $orderStoreCreditUsed; | |
# | |
# ADD THE CUSTOMER TO THE ARRAY OF CUSTOMERS TO COUNT HOW MANY UNIQUE CUSTOMERS WE HAD LATER | |
# THIS IS RELYING ON THE EMAIL ADDRESS AS THE UNIQUE IDENTIFIER | |
# THAT'S NOT GREAT BECAUSE IF A CUSTOMER USES 2 DIFFERENT EMAIL ADDRESS, THEN THEY'LL BE COUNTED AS 2 UNIQUE CUSTOMERS | |
if($customerEmail ne "") | |
{ | |
push(@retailCustomers, lc $customerEmail); | |
} | |
else | |
{ | |
$roninRetailOrders +=1; | |
} | |
} | |
elsif($orderType eq "3p") | |
{ | |
$period3pOrders++; | |
$period3pSubtotal += $item->{"subtotal_price"}; | |
$period3pDiscounts += $item->{"total_discounts"}; | |
$period3pGrossSalesTax += $item->{"total_tax"}; | |
$period3pShipping += $orderShipping; | |
$period3pGiftCardsUsed += $orderGiftCardsUsed; | |
$period3pStoreCreditUsed += $orderStoreCreditUsed; | |
# ADD THE CUSTOMER TO THE ARRAY OF CUSTOMERS TO COUNT UNIQUES LATER | |
if($customerEmail ne "") | |
{ | |
push(@threePCustomers, lc $customerEmail); | |
} | |
else | |
{ | |
$ronin3POrders +=1; | |
} | |
} | |
else | |
{ | |
$periodWebOrders++; | |
$periodWebSubtotal += $item->{"subtotal_price"}; | |
$periodWebDiscounts += $item->{"total_discounts"}; | |
$periodWebGrossSalesTax += $item->{"total_tax"}; | |
$periodWebShipping += $orderShipping; | |
$periodWebGiftCardsUsed += $orderGiftCardsUsed; | |
$periodWebStoreCreditUsed += $orderStoreCreditUsed; | |
# ADD THE CUSTOMER TO THE ARRAY OF CUSTOMERS TO COUNT UNIQUES LATER | |
if($customerEmail ne "") | |
{ | |
push(@webCustomers, lc $customerEmail); | |
} | |
else | |
{ | |
$roninWebOrders +=1; | |
} | |
} | |
# | |
# PRINT THE DATA FOR EACH INDIVIDUAL ORDER TO THE LOG FILE | |
print $log "$shopifyStore->{'name'},$item->{'name'},$item->{'created_at'},$customerEmail,$customerName,$item->{'subtotal_price'},$item->{'total_discounts'},$item->{'total_tax'},$item->{'source_name'},$orderShipping,$orderGiftCardsUsed,$orderStoreCreditUsed,$orderType\n"; | |
# | |
# ANOTHER SLEEP STATEMENT TO AVOID TRIGGERING SHOPIFY'S 2 API CALLS PER SECOND LIMIT | |
usleep(180000); | |
} | |
# DONE LOOPING THROUGH ALL OF THE ORDERS FOR THE STORE | |
# | |
# COUNT THE UNIQUE # OF CUSTOMERS FOR EACH CHANNEL | |
my $retailCustomers = 0; | |
my $webCustomers = 0; | |
my $threePCustomers = 0; | |
my @tempCustArray = (); | |
if($#webCustomers > -1) | |
{ | |
@tempCustArray = keys { map { $_ => 1 } @webCustomers }; | |
$webCustomers = $#tempCustArray + 1; | |
} | |
if($#retailCustomers > -1) | |
{ | |
@tempCustArray = keys { map { $_ => 1 } @retailCustomers }; | |
$retailCustomers = $#tempCustArray + 1; | |
} | |
if($#threePCustomers > -1) | |
{ | |
@tempCustArray = keys { map { $_ => 1 } @threePCustomers }; | |
$threePCustomers = $#tempCustArray + 1; | |
} | |
# | |
# OUTPUT THE STORE DATA TO THE LOG FILES | |
# THIS CALCULATES THE GROSS REVENUE = SUBTOTAL + SHIPPING + DISCOUNTS | |
my $webGrossRev = $periodWebSubtotal + $periodWebShipping + $periodWebDiscounts; | |
my $retailGrossRev = $periodRetailSubtotal + $periodRetailShipping + $periodRetailDiscounts; | |
my $threePGrossRev = $period3pSubtotal + $period3pShipping + $period3pDiscounts; | |
# OUTPUT THE ONLINE CHANNEL DATA FOR THE STORE | |
print $summaryFile "$startDate,$origEnd,$shopifyStore->{'name'},web,$webGrossRev,$periodWebDiscounts,$periodWebGiftCardsUsed,$periodWebStoreCreditUsed,$periodWebGrossSalesTax,$periodWebOrders,$webCustomers,$roninWebOrders\n"; | |
# OUTPUT THE OFFLINE/POS CHANNEL DATA FOR THE STORE | |
print $summaryFile "$startDate,$origEnd,$shopifyStore->{'name'},retail,$retailGrossRev,$periodRetailDiscounts,$periodRetailGiftCardsUsed,$periodRetailStoreCreditUsed,$periodRetailGrossSalesTax,$periodRetailOrders,$retailCustomers,$roninRetailOrders\n"; | |
# OUTPUT THE 3RD-PARTY CHANNEL DATA FOR THE STORE | |
print $summaryFile "$startDate,$origEnd,$shopifyStore->{'name'},3rd-party (Spring),$threePGrossRev,$period3pDiscounts,$period3pGiftCardsUsed,$period3pStoreCreditUsed,$period3pGrossSalesTax,$period3pOrders,$threePCustomers,$ronin3POrders\n"; | |
} | |
# | |
# CLOSE THE LOG FILES THAT THE SCRIPT WAS WRITING TO | |
close $log; | |
close $summaryFile; | |
#if ($response->is_success) | |
#{ | |
# print "URL:$URL\nHeaders:\n"; | |
# print $response->headers_as_string; | |
#} | |
#elsif ($response->is_error) | |
#{ | |
# print "Error:$URL\n"; | |
# print $response->error_as_HTML; | |
#} | |
#} | |
} | |
################################################# | |
# | |
# RUN THE FUNCTIONS DEFINED ABOVE | |
# | |
parse_args(); | |
get_store_data(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment