Skip to content

Instantly share code, notes, and snippets.

@oneillo
Last active October 14, 2019 20:48
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oneillo/18c53a03caec5cfa92a4 to your computer and use it in GitHub Desktop.
Save oneillo/18c53a03caec5cfa92a4 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
use HTTP::Request;
use JSON::XS;
use POSIX qw(ceil strftime);
use Time::HiRes qw(usleep);
use Time::Local;
use DateTime::Format::Strptime;
# GLOBAL VARIABLES
# I KNOW THIS IS BAD...SO SHOOT ME
my $startDate; # Format should be '2015-07-01';
my $endDate; # Format should be '2015-08-01';
my $timestamp = `date +"%Y%m%d%H%M"`; # Timestamp captures when the script is run and is used to name the output files that it creates
chomp($timestamp);
#
# parse_args READS IN OPTIONS THAT ARE PASSED TO THE SCRIPT WHEN IT IS RUN, I.E. get_shopify_data.pl -s 2015-07-01 -e 2015-07-31
# THE ONLY OPTIONS RIGHT NOW ARE -H FOR HELP AND -S AND -E FOR THE START AND END DATES TO GET DATA FOR
sub parse_args()
{
my $arg;
my @argv = @ARGV; #THIS GETS THE ARRAY OF OPTIONS PASSED WHEN THE SCRIPT WAS RUN
# CALL THE HELP FUNCTION IF NO OPTIONS WERE PASSED WHEN IT WAS RUN
if($#argv == "-1")
{
help();
}
# GO THROUGH THE OPTIONS AND PULL OUT THE START AND END DATE
while(@argv)
{
$arg = shift(@argv);
if($arg eq "-s" || $arg eq "-S")
{
if($#argv == "-1")
{
die "Error: Missing start date after -s option\n";
}
$startDate = shift(@argv);
}
elsif($arg eq "-e" || $arg eq "-E")
{
if($#argv == "-1")
{
die "Error: Missing end date after -e option\n";
}
$endDate = shift(@argv);
}
elsif($arg eq "-h" || $arg eq "-H")
{
help();
}
else
{
die "Illegal option: $arg.\n";
}
}
}
#
# HELP FUNCTION THAT OUTPUTS THE OPTIONS THAT NEED TO BE PASSED TO THE SCRIPT WHEN IT'S RUN
sub help
{
print "Usage:\n",
" get_shopify_data.pl <OPTIONS>\n",
" Required:\n",
" -s START_DATE : The first day in the period you want to pull data for. In YYYY-MM-DD format, e.g. 2015-07-01 \n",
" -e END_DATE : The last day in the period you want to pull data for. In YYYY-MM-DD format, e.g. 2015-07-31 \n",
" -h : See the help info \n";
die "\n";
}
#
# THE MAIN FUNCTION THAT LOGS INTO THE SHOPIFY ACCOUNT, PULLS THE DATA, AND OUTPUTES METRICS TO LOG FILES IN THE SAME FOLDER WHERE THE SCRIPT IS LOCATED
sub get_store_data
{
#
# CREATE THE LOG FILE NAMES WITH THE RANGE OF DAYS WE ARE GETTING DATA FOR
my $parser = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d');
my $origEnd = $endDate;
my $origStartDate = $parser->parse_datetime($startDate);
my $origEndDate = $parser->parse_datetime($endDate);
my $dateRange = $origStartDate->strftime('%m%d%Y') . "-" . $origEndDate->strftime('%m%d%Y');
my $logfile = "Shopify_raw_data_$dateRange\_$timestamp.csv"; # CSV file where the raw data is stored when the script finishes; each row corresponds to an order
my $periodsummaryfile = "Shopify_summary_$dateRange\_$timestamp.csv"; # CSV file where the summary metrics are stored when the script finishes
#
# ADD 1 DAY TO THE END TIME TO GET BACK ALL OF THE DATA FOR THAT DAY TOO
# OTHERWISE, IF YOU PASS AN END DATE OF 2015-07-31, IT WILL ONLY INCLUDE ORDERS UP TO THE DAY BEFORE, 2015-07-30
$origEndDate->add(days => 1);
$endDate = $origEndDate->strftime('%Y-%m-%d');
# CREATE A TIMESTAMPED FILENAME FOR WRITING THE RAW DATA
# AND THEN OPEN THE FILE TO WRITE DATA TO IT
open(my $log, '>', $logfile) or die "Could not open file '$logfile' $!";
open(my $summaryFile, '>', $periodsummaryfile) or die "Could not open file '$periodsummaryfile' $!";
#
# PRINT THE HEADERS FOR THE COLUMNS OF DATA TO THE OUTPUT FILES
print $log "Store_name,Order_name,Created_at,Email,Name,Subtotal_price,Total_discounts,Total_tax,Source_name,Shipping_costs,Gift_Cards_Used,Store_Credit_Used,Order_Type\n";
print $summaryFile "Period_Start,Period_End,Store_Name,Channel,Gross_Revenue,Discounts,Gift_Cards_Used,Store_Credit_Used,Gross_Sales_Tax,Orders,Unique_Customers,Orders_with_no_email\n";
#
# ARRAY OF SHOPIFY STORE LOGIN INFORMATION
# INCLUDE THE CREDENTIALS FOR ALL OF THE SHOPIFY STORES THAT YOU WANT TO PULL DATA FROM
# STORE NAME (WHATEVER YOU WANT TO CALL EACH ONE), THE STORE'S URL, THE SHOPIFY API KEY, AND THE SHOPIFY API PASSWORD
#
my @shopifyStores =
(
{
name => 'INSERT_STORE_NAME_HERE', #INFO FOR THE FIRST SHOPIFY STORE/ACCOUNT
url => 'INSERT_STORE_URL_HERE',
apiKey => 'INSERT_STORE_API_KEY_HERE',
apiPass => 'INSERT_STORE_API_PASSWORD_HERE'
},
{
name => 'INSERT_STORE_NAME_HERE', #INFO FOR THE SECOND SHOPIFY STORE/ACCOUNT
url => 'INSERT_STORE_URL_HERE',
apiKey => 'INSERT_STORE_API_KEY_HERE',
apiPass => 'INSERT_STORE_API_PASSWORD_HERE'
},
# COPY AND ADD ANOTHER BLOCK OF THESE FOR EACH SHOPIFY STORE YOU ARE PULLING DATA FROM
{
name => 'INSERT_STORE_NAME_HERE', #INFO FOR THE THIRD SHOPIFY STORE/ACCOUNT
url => 'INSERT_STORE_URL_HERE',
apiKey => 'INSERT_STORE_API_KEY_HERE',
apiPass => 'INSERT_STORE_API_PASSWORD_HERE'
},
);
#
# NOW GO THROUGH EACH STORE DEFINED IN THE @shopifyStores ARRAY ABOVE
foreach my $shopifyStore (@shopifyStores)
{
my $storeUrl = $shopifyStore->{'url'};
my $apiKey = $shopifyStore->{'apiKey'};
my $apiPass = $shopifyStore->{'apiPass'};
#
# VARIABLES TO HOLD STORE DATA
my $periodOrderCount = 0; # number of orders during the defined period
my $pagesOfOrders = 0; # number of pages of orders. Shopify only allows you to get up to 250 orders at a time, so if you have more than that, you need to use pagination to get them all
my @orders = (); # an array to hold all of the order information
my @threePCustomers = (); # array to hold the emails of customers that placed an order through third-party channels that have integrated with our store
my @retailCustomers = (); # array to hold the emails of customers that placed an order through one of our retail stores
my @webCustomers = (); # array to hold the emails of the customers that placed an order through our online store
my $roninWebOrders = 0; # these variables end up holding the number of orders in each channel that don't have an email associated with them
my $ronin3POrders = 0;
my $roninRetailOrders = 0;
my $periodWebSubtotal = 0; # metrics that are collected for the online channel for the Shopify store
my $periodWebGrossSalesTax = 0;
my $periodWebDiscounts = 0;
my $periodWebGiftCardsUsed = 0;
my $periodWebShipping = 0;
my $periodWebOrders = 0;
my $periodWebStoreCreditUsed = 0;
my $periodRetailSubtotal = 0; # metrics that are collected for the POS channel for the Shopify store
my $periodRetailGrossSalesTax = 0;
my $periodRetailDiscounts = 0;
my $periodRetailGiftCardsUsed = 0;
my $periodRetailShipping = 0;
my $periodRetailOrders = 0;
my $periodRetailStoreCreditUsed = 0;
my $period3pSubtotal = 0; # metrics that are collected for the 3rd-party channels connected to the Shopify store
my $period3pGrossSalesTax = 0;
my $period3pDiscounts = 0;
my $period3pGiftCardsUsed = 0;
my $period3pShipping = 0;
my $period3pOrders = 0;
my $period3pStoreCreditUsed = 0;
# http://user:password@www.domain.com/script.pl
# THE SHOPIFY URL THAT IS CALLED TO GET BACK THE # OF ORDERS DURING THE PERIOD OF TIME
my $URL = "https://$apiKey:$apiPass\@$storeUrl/admin/orders/count.json?created_at_min=$startDate 00:00&created_at_max=$endDate 00:00";
my $ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 1 });
my $header = HTTP::Request->new(GET => $URL);
my $request = HTTP::Request->new('GET', $URL, $header);
# GET THE JSON DATA PASSED BACK AND ASSIGN THE # OF ORDERS TO THE PERIODORDERCOUNT VARIABLE
my $response = $ua->request($request);
my $arrayref = decode_json($response->content);
$periodOrderCount = $arrayref->{count};
# CALCULATE THE TOTAL # OF PAGES OF ORDERS = # OF ORDERS / 250 ORDERS
$pagesOfOrders = ceil( $arrayref->{count} / 250);
#
# NOW PULL THE INFORMATION FOR ALL OF THE ORDERS PLACED DURING THE PERIOD
for(my $i = 1; $i <= $pagesOfOrders; $i++)
{
# CREATE THE URL THAT WE'LL SEND TO SHOPIFY TO GET BACK DATA ABOUT ALL OF THE ORDERS
$URL = "https://$apiKey:$apiPass\@$storeUrl/admin/orders.json?page=$i&limit=250&created_at_min=$startDate 00:00&created_at_max=$endDate 00:00&fields=created_at, id, name, email, subtotal_price, total_discounts, total_tax, shipping_lines, source_name,customer,discount_codes";
$request = HTTP::Request->new('GET', $URL, $header);
$response = $ua->request($request);
my $ordersObj = decode_json($response->content);
# ADD EACH PAGE OF ORDERS TO THE ORDERS ARRAY UNTIL WE HAVE ALL OF THEM FOR THIS SHOPIFY STORE FOR THE PERIOD
push(@orders, @{ $ordersObj->{'orders'} });
}
#
# LOOP THROUGH EACH ORDER AND PULL SUMMARY STATS
my $orderType = "";
foreach my $item ( @orders )
{
#
# SOME VARIABLES TO HOLD ORDER INFO THAT I PULL FROM HASHES
my $orderShipping = 0;
my $orderGiftCardsUsed = 0;
my $orderStoreCreditUsed = 0;
#
# DETERMINE WHERE TO COUNT ORDER BASED ON SOURCE NAME: RETAIL, WEB, 3RD-PARTY
# CHECK THIS TO MAKE SURE YOU ARE DEFINING RETAIL VS WEB VS 3RD-PARTY CHANNELS THE SAME WAY
if($item->{"source_name"} eq "pos" || $item->{"source_name"} eq "iphone" || $item->{"source_name"} eq "android" || $item->{"source_name"} eq "shopify_draft_order")
{
$orderType = "retail";
}
elsif($item->{"source_name"} eq "ENTER_NAME_OF_ANY_3RD_PARTY_CHANNELS_HERE")
{
$orderType = "3p";
}
else
{
$orderType = "web";
}
#
# GET THE SHIPPING COST FOR THE ORDER
my @shipping_lines = @{ $item->{'shipping_lines'} };
if(scalar @shipping_lines > 0)
{
foreach my $f ( @shipping_lines )
{
$orderShipping += $f->{"price"};
}
}
#
# SLEEP STATEMENT HERE IS TO AVOID TRIPPING SHOPIFY'S 2 API REQUESTS PER SECOND LIMIT
usleep(60000);
#
# NOW GET TRANSACTION INFORMATION FOR EVERY ORDER
# NEED THIS IN ORDER TO PULL OUT IF THE CUSTOMER PAID FOR THE ORDER VIA STORE CREDIT OR GIFT CARDS, WHICH YOU DON'T WANT TO DOUBLE COUNT AS REVENUE
$URL = "https://$apiKey:$apiPass\@$storeUrl/admin/orders/$item->{'id'}/transactions.json?fields=amount,gateway, source_name, kind, status";
$request = HTTP::Request->new('GET', $URL, $header);
$response = $ua->request($request);
#if($response->is_error)
#{
# print "Error:$URL\n";
# print $response->error_as_HTML;
#}
my $transactionsObj = decode_json($response->content);
#
# TRACK GIFT CARD AND STORE CREDIT $ USED TO PAY FOR ORDERS
# THIS SHOULD BE ACCOUNTED FOR DIFFERENTLY VIA REVENUE RECOGNITION GUIDELINES
my @transactions = @{ $transactionsObj->{'transactions'} };
foreach my $trans (@transactions)
{
if($trans->{'kind'} eq "sale" && $trans->{'gateway'} eq "gift_card")
{
$orderGiftCardsUsed += $trans->{'amount'};
}
elsif($trans->{'kind'} eq "refund" && $trans->{'gateway'} eq "gift_card")
{
$orderGiftCardsUsed -= $trans->{'amount'};
}
elsif($trans->{'kind'} eq "sale" && $trans->{'gateway'} eq "store-credit")
{
$orderStoreCreditUsed += $trans->{'amount'};
}
elsif($trans->{'kind'} eq "refund" && $trans->{'gateway'} eq "store-credit")
{
$orderStoreCreditUsed -= $trans->{'amount'};
}
}
#
# GET CUSTOMER INFORMATION FOR THE ORDER
# PULLING OUT THE CUSTOMER'S EMAIL ADDRESS OR NAME IF IT EXISTS
# THE ORDER INFO ALSO CONTAINS A CUSTOMER EMAIL, BUT I'VE FOUND THAT SOMETIMES THAT ONE IS BLANK, WHEREAS THE CUSTOMER INFO ISN'T
my $customer = $item->{'customer'};
my $customerEmail = "";
my $customerName = "";
if(defined $customer->{'email'})
{
$customerEmail = $customer->{'email'};
}
if(defined $customer->{'first_name'} && defined $customer->{'last_name'})
{
# REMOVE ANY COMMAS IN THE NAME SO WE DON'T RUN INTO PROBLEMS WITH THE CSV FILES LATER
my $custFirstName = $customer->{'first_name'};
$custFirstName =~ s/,/ /g;
my $custLastName = $customer->{'last_name'};
$custLastName =~ s/,/ /g;
$customerName = "$custFirstName $custLastName";
}
#
# ADD THE VALUES TO THE RIGHT PLACES BASED ON THE ORDER TYPE: RETAIL, WEB, OR 3RD-PARTY
if($orderType eq "retail")
{
$periodRetailOrders++;
$periodRetailSubtotal += $item->{"subtotal_price"};
$periodRetailDiscounts += $item->{"total_discounts"};
$periodRetailGrossSalesTax += $item->{"total_tax"};
$periodRetailShipping += $orderShipping;
$periodRetailGiftCardsUsed += $orderGiftCardsUsed;
$periodRetailStoreCreditUsed += $orderStoreCreditUsed;
#
# ADD THE CUSTOMER TO THE ARRAY OF CUSTOMERS TO COUNT HOW MANY UNIQUE CUSTOMERS WE HAD LATER
# THIS IS RELYING ON THE EMAIL ADDRESS AS THE UNIQUE IDENTIFIER
# THAT'S NOT GREAT BECAUSE IF A CUSTOMER USES 2 DIFFERENT EMAIL ADDRESS, THEN THEY'LL BE COUNTED AS 2 UNIQUE CUSTOMERS
if($customerEmail ne "")
{
push(@retailCustomers, lc $customerEmail);
}
else
{
$roninRetailOrders +=1;
}
}
elsif($orderType eq "3p")
{
$period3pOrders++;
$period3pSubtotal += $item->{"subtotal_price"};
$period3pDiscounts += $item->{"total_discounts"};
$period3pGrossSalesTax += $item->{"total_tax"};
$period3pShipping += $orderShipping;
$period3pGiftCardsUsed += $orderGiftCardsUsed;
$period3pStoreCreditUsed += $orderStoreCreditUsed;
# ADD THE CUSTOMER TO THE ARRAY OF CUSTOMERS TO COUNT UNIQUES LATER
if($customerEmail ne "")
{
push(@threePCustomers, lc $customerEmail);
}
else
{
$ronin3POrders +=1;
}
}
else
{
$periodWebOrders++;
$periodWebSubtotal += $item->{"subtotal_price"};
$periodWebDiscounts += $item->{"total_discounts"};
$periodWebGrossSalesTax += $item->{"total_tax"};
$periodWebShipping += $orderShipping;
$periodWebGiftCardsUsed += $orderGiftCardsUsed;
$periodWebStoreCreditUsed += $orderStoreCreditUsed;
# ADD THE CUSTOMER TO THE ARRAY OF CUSTOMERS TO COUNT UNIQUES LATER
if($customerEmail ne "")
{
push(@webCustomers, lc $customerEmail);
}
else
{
$roninWebOrders +=1;
}
}
#
# PRINT THE DATA FOR EACH INDIVIDUAL ORDER TO THE LOG FILE
print $log "$shopifyStore->{'name'},$item->{'name'},$item->{'created_at'},$customerEmail,$customerName,$item->{'subtotal_price'},$item->{'total_discounts'},$item->{'total_tax'},$item->{'source_name'},$orderShipping,$orderGiftCardsUsed,$orderStoreCreditUsed,$orderType\n";
#
# ANOTHER SLEEP STATEMENT TO AVOID TRIGGERING SHOPIFY'S 2 API CALLS PER SECOND LIMIT
usleep(180000);
}
# DONE LOOPING THROUGH ALL OF THE ORDERS FOR THE STORE
#
# COUNT THE UNIQUE # OF CUSTOMERS FOR EACH CHANNEL
my $retailCustomers = 0;
my $webCustomers = 0;
my $threePCustomers = 0;
my @tempCustArray = ();
if($#webCustomers > -1)
{
@tempCustArray = keys { map { $_ => 1 } @webCustomers };
$webCustomers = $#tempCustArray + 1;
}
if($#retailCustomers > -1)
{
@tempCustArray = keys { map { $_ => 1 } @retailCustomers };
$retailCustomers = $#tempCustArray + 1;
}
if($#threePCustomers > -1)
{
@tempCustArray = keys { map { $_ => 1 } @threePCustomers };
$threePCustomers = $#tempCustArray + 1;
}
#
# OUTPUT THE STORE DATA TO THE LOG FILES
# THIS CALCULATES THE GROSS REVENUE = SUBTOTAL + SHIPPING + DISCOUNTS
my $webGrossRev = $periodWebSubtotal + $periodWebShipping + $periodWebDiscounts;
my $retailGrossRev = $periodRetailSubtotal + $periodRetailShipping + $periodRetailDiscounts;
my $threePGrossRev = $period3pSubtotal + $period3pShipping + $period3pDiscounts;
# OUTPUT THE ONLINE CHANNEL DATA FOR THE STORE
print $summaryFile "$startDate,$origEnd,$shopifyStore->{'name'},web,$webGrossRev,$periodWebDiscounts,$periodWebGiftCardsUsed,$periodWebStoreCreditUsed,$periodWebGrossSalesTax,$periodWebOrders,$webCustomers,$roninWebOrders\n";
# OUTPUT THE OFFLINE/POS CHANNEL DATA FOR THE STORE
print $summaryFile "$startDate,$origEnd,$shopifyStore->{'name'},retail,$retailGrossRev,$periodRetailDiscounts,$periodRetailGiftCardsUsed,$periodRetailStoreCreditUsed,$periodRetailGrossSalesTax,$periodRetailOrders,$retailCustomers,$roninRetailOrders\n";
# OUTPUT THE 3RD-PARTY CHANNEL DATA FOR THE STORE
print $summaryFile "$startDate,$origEnd,$shopifyStore->{'name'},3rd-party (Spring),$threePGrossRev,$period3pDiscounts,$period3pGiftCardsUsed,$period3pStoreCreditUsed,$period3pGrossSalesTax,$period3pOrders,$threePCustomers,$ronin3POrders\n";
}
#
# CLOSE THE LOG FILES THAT THE SCRIPT WAS WRITING TO
close $log;
close $summaryFile;
#if ($response->is_success)
#{
# print "URL:$URL\nHeaders:\n";
# print $response->headers_as_string;
#}
#elsif ($response->is_error)
#{
# print "Error:$URL\n";
# print $response->error_as_HTML;
#}
#}
}
#################################################
#
# RUN THE FUNCTIONS DEFINED ABOVE
#
parse_args();
get_store_data();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment