Skip to content

Instantly share code, notes, and snippets.

@nattaylor
Created November 5, 2019 02:30
Show Gist options
  • Save nattaylor/da176e1b4a107a6f218859af101fa0cd to your computer and use it in GitHub Desktop.
Save nattaylor/da176e1b4a107a6f218859af101fa0cd to your computer and use it in GitHub Desktop.
Server-side Google Analytics
<?php
/**
* Send Apache Logs to Google Analytics
*/
// URL to send logs to
const GA = 'https://www.google-analytics.com/batch';
// Path to log to process
const LOG = '/var/log/httpd/access_log';
// Path to file storing byte offset of already processed
const STATEFILE = '/home/user/.statefile';
// The source host
const HOSTNAME = 'example.com';
// Google Analyics ID
const TID = 'UA-XXXXXX-Y';
// Regex to parse a logLine
const REGEX_APACHE = '/^(?P<IP>\S+)
\ (?P<ident>\S)
\ (?P<auth_user>.*?) # Spaces are allowed here, can be empty.
\ (?P<date>\[[^]]+\])
\ "(?P<http_start_line>.+ .+)" # At least one space: HTTP 0.9
\ (?P<status_code>[0-9]+) # Status code is _always_ an integer
\ (?P<response_size>(?:[0-9]+|-)) # Response size can be -
\ "(?P<referrer>.*)" # Referrer can contains everything: its just a header
\ "(?P<user_agent>.*)"$/x';
/**
* Process new loglines and send to Google Analytics
*
* NOTE: There's a daily server process to archive and reset the log
*/
function main() {
$startTime = microtime(true); // For logging script duration
$batches = 0; // For logging
$batch = array(); // Holds current batch
// Avoid reprocessing loglines
$startByte = file_get_contents(STATEFILE);
$bytesRead = $startByte;
if ($startByte > filesize(LOG)) {
$bytesRead = 0;
}
$msg = "";
$log = fopen(LOG, "r");
fseek($log, $bytesRead);
while (! feof($log)) {
$line = fgets($log);
if (strlen($line) == 0) {
$msg = " No new loglines.";
break;
}
$bytesRead+=strlen($line);
$parsedLine = parseLog($line);
if (isPageview($parsedLine)) {
array_push($batch, $parsedLine);
}
if (count($batch) == 20) {
try {
sendBatch($batch);
} catch (Exception $e) {
//
}
$batch = array();
$batches += 1;
}
}
fclose($log);
file_put_contents(STATEFILE, $bytesRead);
if (count($batch) > 0) {
try {
sendBatch($batch);
} catch (Exception $e) {
//
}
$batches += 1;
}
$date = date("c");
$duration = round((microtime(true) - $startTime)*1000, 1);
error_log("[$date] apache2google.php: Processed $batches batches in $duration milliseconds.$msg".PHP_EOL, 3, "/home/taylorwe/scripts/apache2google.log");
return true;
}
/**
* Build a "Hit" payload from a parsed logline
*
* https://ga-dev-tools.appspot.com/hit-builder/
* v=1
* &t=pageview // Hit type
* &tid=UA-XXXXX-Y // Analytics property
* &cid=555 // User ID
* &t=pageview // Pageview hit type.
* &dh=mydemo.com // Document hostname.
* &dp=/home // Document path.
* &dr=http%3A%2F%2Fexample.com // Document referrer.
* &uip=1.2.3.4 // IP address override.
* &ua=Opera/9.80 // User-Agent override.
* &qt=560 // L
*/
function buildHit($parsedLine) {
$hit = array("v=1", "t=pageview", "tid=".TID, "dh=".HOSTNAME, "aip=1");
if ($parsedLine['IP']) {
array_push($hit, "uip=".$parsedLine['IP']);
array_push($hit, "cid=".md5($parsedLine['IP']));
}
if (preg_match('/(?:^-$|'.HOSTNAME.')/', $parsedLine['referrer']) == 0) {
array_push($hit, "dr=".urlencode($parsedLine['referrer']));
}
if ($parsedLine['user_agent']) {
array_push($hit, "ua=".urlencode($parsedLine['user_agent']));
}
if ($parsedLine['http_start_line']) {
$path = explode(" ", $parsedLine['http_start_line'])[1];
$parsedUrl = parse_url("https://".HOSTNAME.$path);
$dt = implode(" ", array_map('ucwords', explode("-", basename($parsedUrl['path']))));
$dp = substr($path, -1) == "/" ? urlencode($path) : urlencode("$path/");
array_push($hit, "dp=$dp");
array_push($hit, "dt=$dt");
}
if ($parsedLine['date']) {
$datediff = (time() - strtotime(trim($parsedLine['date'], '[]'))) * 1000;
array_push($hit, "qt=$datediff");
}
return implode("&", $hit);
}
function parseLog($line) {
if (preg_match(REGEX_APACHE, $line, $matches)) {
return $matches;
}
return false;
}
/** Exclude non-pages */
function isPageview($parsedLine) {
$url = parse_url("https://".HOSTNAME.explode(" ", $parsedLine['http_start_line'])[1]);
if ($parsedLine['status_code'] != 200 || preg_match('/(?:^.+\..+$|^feed$)/', basename($url['path'])) == 1 || preg_match('/^\/wp-/', $url['path']) == 1) {
return false;
}
return true;
}
/**
* Send a batch of hits to Google Analytics
*
* https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#batch
* POST /batch HTTP/1.1
* Host: www.google-analytics.com
*
* v=1&tid=UA-XXXXX-Y&cid=555&t=pageview&dp=%2Fhome
* v=1&tid=UA-XXXXX-Y&cid=555&t=pageview&dp=%2Fabout
* v=1&tid=UA-XXXXX-Y&cid=555&t=pageview&dp=%2Fcontact
* @return [type] [description]
*/
function sendBatch($batch) {
$payload = implode("\n", array_map('buildHit', $batch));
$curlHandle = curl_init(GA);
curl_setopt_array($curlHandle, array(
CURLOPT_POST => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POSTFIELDS => $payload
));
$response = curl_exec($curlHandle);
if ($response === false) {
return false;
}
curl_close($curlHandle);
return true;
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment