Created
November 5, 2019 02:30
-
-
Save nattaylor/da176e1b4a107a6f218859af101fa0cd to your computer and use it in GitHub Desktop.
Server-side Google Analytics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Send Apache Logs to Google Analytics | |
*/ | |
// URL to send logs to | |
const GA = 'https://www.google-analytics.com/batch'; | |
// Path to log to process | |
const LOG = '/var/log/httpd/access_log'; | |
// Path to file storing byte offset of already processed | |
const STATEFILE = '/home/user/.statefile'; | |
// The source host | |
const HOSTNAME = 'example.com'; | |
// Google Analyics ID | |
const TID = 'UA-XXXXXX-Y'; | |
// Regex to parse a logLine | |
const REGEX_APACHE = '/^(?P<IP>\S+) | |
\ (?P<ident>\S) | |
\ (?P<auth_user>.*?) # Spaces are allowed here, can be empty. | |
\ (?P<date>\[[^]]+\]) | |
\ "(?P<http_start_line>.+ .+)" # At least one space: HTTP 0.9 | |
\ (?P<status_code>[0-9]+) # Status code is _always_ an integer | |
\ (?P<response_size>(?:[0-9]+|-)) # Response size can be - | |
\ "(?P<referrer>.*)" # Referrer can contains everything: its just a header | |
\ "(?P<user_agent>.*)"$/x'; | |
/** | |
* Process new loglines and send to Google Analytics | |
* | |
* NOTE: There's a daily server process to archive and reset the log | |
*/ | |
function main() { | |
$startTime = microtime(true); // For logging script duration | |
$batches = 0; // For logging | |
$batch = array(); // Holds current batch | |
// Avoid reprocessing loglines | |
$startByte = file_get_contents(STATEFILE); | |
$bytesRead = $startByte; | |
if ($startByte > filesize(LOG)) { | |
$bytesRead = 0; | |
} | |
$msg = ""; | |
$log = fopen(LOG, "r"); | |
fseek($log, $bytesRead); | |
while (! feof($log)) { | |
$line = fgets($log); | |
if (strlen($line) == 0) { | |
$msg = " No new loglines."; | |
break; | |
} | |
$bytesRead+=strlen($line); | |
$parsedLine = parseLog($line); | |
if (isPageview($parsedLine)) { | |
array_push($batch, $parsedLine); | |
} | |
if (count($batch) == 20) { | |
try { | |
sendBatch($batch); | |
} catch (Exception $e) { | |
// | |
} | |
$batch = array(); | |
$batches += 1; | |
} | |
} | |
fclose($log); | |
file_put_contents(STATEFILE, $bytesRead); | |
if (count($batch) > 0) { | |
try { | |
sendBatch($batch); | |
} catch (Exception $e) { | |
// | |
} | |
$batches += 1; | |
} | |
$date = date("c"); | |
$duration = round((microtime(true) - $startTime)*1000, 1); | |
error_log("[$date] apache2google.php: Processed $batches batches in $duration milliseconds.$msg".PHP_EOL, 3, "/home/taylorwe/scripts/apache2google.log"); | |
return true; | |
} | |
/** | |
* Build a "Hit" payload from a parsed logline | |
* | |
* https://ga-dev-tools.appspot.com/hit-builder/ | |
* v=1 | |
* &t=pageview // Hit type | |
* &tid=UA-XXXXX-Y // Analytics property | |
* &cid=555 // User ID | |
* &t=pageview // Pageview hit type. | |
* &dh=mydemo.com // Document hostname. | |
* &dp=/home // Document path. | |
* &dr=http%3A%2F%2Fexample.com // Document referrer. | |
* &uip=1.2.3.4 // IP address override. | |
* &ua=Opera/9.80 // User-Agent override. | |
* &qt=560 // L | |
*/ | |
function buildHit($parsedLine) { | |
$hit = array("v=1", "t=pageview", "tid=".TID, "dh=".HOSTNAME, "aip=1"); | |
if ($parsedLine['IP']) { | |
array_push($hit, "uip=".$parsedLine['IP']); | |
array_push($hit, "cid=".md5($parsedLine['IP'])); | |
} | |
if (preg_match('/(?:^-$|'.HOSTNAME.')/', $parsedLine['referrer']) == 0) { | |
array_push($hit, "dr=".urlencode($parsedLine['referrer'])); | |
} | |
if ($parsedLine['user_agent']) { | |
array_push($hit, "ua=".urlencode($parsedLine['user_agent'])); | |
} | |
if ($parsedLine['http_start_line']) { | |
$path = explode(" ", $parsedLine['http_start_line'])[1]; | |
$parsedUrl = parse_url("https://".HOSTNAME.$path); | |
$dt = implode(" ", array_map('ucwords', explode("-", basename($parsedUrl['path'])))); | |
$dp = substr($path, -1) == "/" ? urlencode($path) : urlencode("$path/"); | |
array_push($hit, "dp=$dp"); | |
array_push($hit, "dt=$dt"); | |
} | |
if ($parsedLine['date']) { | |
$datediff = (time() - strtotime(trim($parsedLine['date'], '[]'))) * 1000; | |
array_push($hit, "qt=$datediff"); | |
} | |
return implode("&", $hit); | |
} | |
function parseLog($line) { | |
if (preg_match(REGEX_APACHE, $line, $matches)) { | |
return $matches; | |
} | |
return false; | |
} | |
/** Exclude non-pages */ | |
function isPageview($parsedLine) { | |
$url = parse_url("https://".HOSTNAME.explode(" ", $parsedLine['http_start_line'])[1]); | |
if ($parsedLine['status_code'] != 200 || preg_match('/(?:^.+\..+$|^feed$)/', basename($url['path'])) == 1 || preg_match('/^\/wp-/', $url['path']) == 1) { | |
return false; | |
} | |
return true; | |
} | |
/** | |
* Send a batch of hits to Google Analytics | |
* | |
* https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#batch | |
* POST /batch HTTP/1.1 | |
* Host: www.google-analytics.com | |
* | |
* v=1&tid=UA-XXXXX-Y&cid=555&t=pageview&dp=%2Fhome | |
* v=1&tid=UA-XXXXX-Y&cid=555&t=pageview&dp=%2Fabout | |
* v=1&tid=UA-XXXXX-Y&cid=555&t=pageview&dp=%2Fcontact | |
* @return [type] [description] | |
*/ | |
function sendBatch($batch) { | |
$payload = implode("\n", array_map('buildHit', $batch)); | |
$curlHandle = curl_init(GA); | |
curl_setopt_array($curlHandle, array( | |
CURLOPT_POST => true, | |
CURLOPT_RETURNTRANSFER => true, | |
CURLOPT_POSTFIELDS => $payload | |
)); | |
$response = curl_exec($curlHandle); | |
if ($response === false) { | |
return false; | |
} | |
curl_close($curlHandle); | |
return true; | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment