Created
March 14, 2019 22:01
-
-
Save miike/cbe99c2d8c220b548f062dca23cdc6e0 to your computer and use it in GitHub Desktop.
Attempt at a Node collector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// this collector code isn't finished | |
const fs = require('fs'); | |
const path = require('path'); | |
const bufrw = require('bufrw'); | |
const Thrift = require('thriftrw').Thrift; | |
const thriftrw = require('thriftrw'); | |
const express = require('express', '4.16.2'); | |
const cookieParser = require('cookie-parser'); | |
const uuidv4 = require('uuid/v4'); | |
const app = express(); | |
const buffer = require('buffer'); | |
const { URL, URLSearchParams } = require('url'); | |
app.use(cookieParser()); | |
function thriftify(payload){ | |
var source = fs.readFileSync(path.join('payload.thrift'), 'ascii'); | |
var thrift = new Thrift({ | |
source: source, | |
strict: false, | |
allowOptionalArguments: false, | |
defaultAsUndefined: true | |
}); | |
var test = new thrift.CollectorPayload(payload); | |
var t = thrift.getType('CollectorPayload'); | |
var byteString = t.toBuffer(test).toString('utf8'); | |
return byteString; | |
} | |
// we also need to set the P3P header | |
// health endpoint (200) | |
app.get('/health', function(req, res) { | |
res.send('OK'); | |
}); | |
app.get('/crossdomain.xml', function(req,res) { | |
// serve crossdomain.xml | |
var encoding = 'latin1'; // ISO-8859-1 | |
var flash = new Buffer("<?xml version=\"1.0\"?>\n<cross-domain-policy>\n <allow-access-from domain=\"*\" secure=\"false\" />\n</cross-domain-policy>", encoding); | |
res.setHeader('Content-Type', 'text/xml; charset=ISO-8859-1'); | |
res.send(flash); | |
}); | |
// responseHandler | |
function cookie1(queryString, body, path, cookie, userAgent, refererUri, hostname, ip, request, pixelExpected, contentType, res){ | |
// TODO: ip address and partition keys | |
var queryParams = request.query; | |
var useIpAddressAsPartitionKey = false; | |
var [ipAddress, partitionKey] = ipAndPartitionKey(ip, useIpAddressAsPartitionKey); | |
var redirect = path.startsWith('/r/'); | |
var nuidOpt = getNetworkUserId(request, cookie); | |
var cookieBounceName = 'n3pc'; | |
// check if the cookiebouncename is defined | |
var bouncing = typeof queryParams.cookieBounceName !== 'undefined'; | |
console.log('Bouncing?:', bouncing); | |
// we bounce if it's enabled and we couldn't retrieve the nuid and we're not already bouncing | |
var bounce = typeof nuidOpt == 'undefined' && !bouncing && pixelExpected && !redirect; | |
var nuid = ""; | |
if (typeof nuidOpt !== 'undefined') { | |
nuid = nuidOpt; | |
} else if (bouncing) { // should this be bounce? | |
nuid = "fallback"; | |
} else { | |
nuid = uuidv4(); | |
} | |
var ct = contentType; // this should be lowercase | |
var event = buildEvent( | |
queryString, | |
body, | |
path, | |
userAgent, | |
refererUri, | |
hostname, | |
ip, | |
request, | |
nuid, | |
ct | |
); | |
// request.uri is not a real thing...fix this | |
// https://stackoverflow.com/questions/10183291/how-to-get-the-full-url-in-express | |
// sink responses happens here | |
res = bounceLocationHeader(queryParams, request, cookieBounceName, bounce); | |
// we need to set a list of headers here | |
// 1. set bounce location header | |
// THIS NEEDS TO BE DONE! | |
// 2. set p3p header | |
res.setHeader('P3P', 'policyref="/w3c/p3p.xml", CP="NOI DSP COR NID PSA OUR IND COM NAV STA"'); | |
// 3. set access control allow origin header | |
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOriginHeader(request)); | |
// 4. access control allow credentials | |
res.setHeader('Access-Control-Allow-Credentials', 'true'); | |
// now we can build the response | |
// reassigning or additive? | |
var headers1 = []; // not sure what this should be set to | |
var sink = null; // this should be defined later. | |
[res, body] = buildHttpResponse(event, partitionKey, queryParams, headers1, redirect, pixelExpected, bounce, sink, res); | |
// this ordinarily returns a http response | |
// and a List[Array[Bute]] | |
return [res, body]; | |
} | |
function getNetworkUserId(request, cookie){ | |
// set the network user id if available | |
// otherwise generate a uuid4 | |
var cookieName = 'sp'; | |
// Check if nuid (network id) param is present | |
var networkUserIdParam = request.query.nuid; | |
switch (networkUserIdParam) { | |
case typeof networkUserIdParam !== 'undefined': | |
// Use nuid as networkUserId if present | |
return networkUserIdParam; | |
default: | |
var cookieId = request.cookies[cookieName]; | |
// Else use the same UUID if the request cookie contains `sp`. | |
if (typeof cookieId !== 'undefined') { | |
return cookieId; | |
} | |
return undefined; | |
} | |
} | |
class CollectorPayload { | |
} | |
// Builds a raw event from an HTTP request | |
function buildEvent(queryString, body, path, userAgent, refererUri, hostname, ipAddress, request, networkUserId, contentType) { | |
var event = new CollectorPayload(); | |
event.schema = 'iglu:com.snowplowanalytics.snowplow/CollectorPayload/thrift/1-0-0'; | |
event.ipAddress = ipAddress; | |
event.timestamp = (new Date).getTime(); | |
event.encoding = 'UTF-8'; | |
event.collector = 'node-collector-0.0.1'; | |
event.path = path; | |
event.querystring = queryString; | |
event.body = body; | |
event.hostname = hostname; | |
event.networkUserId = networkUserId; | |
event.userAgent = userAgent; | |
event.refererUri = refererUri; | |
return event; | |
} | |
// end responseHandler | |
function buildHttpResponse(event, partitionKey, queryParams, headers, redirect, pixelExpected, bounce, sinkConfig, res){ | |
// ordinarily returns HttpResponse, List[Array[Byte]] | |
if (redirect) { | |
res = buildRedirectHttpResponse(event, partitionKey, queryParams, res); | |
return res; | |
} else { | |
// TODO: check that the Kinesis sink isn't shutting down | |
// we need to ensure here that headers are preserved | |
[res, body] = buildUsualHttpResponse(pixelExpected, bounce, res); | |
return [res, body]; | |
} | |
} | |
function buildUsualHttpResponse(pixelExpected, bounce, res){ | |
if (pixelExpected && bounce) { | |
// respond with found? | |
res.status(302); | |
} else if (pixelExpected && bounce === false) { | |
// respond with the gif | |
const buf = new Buffer("R0lGODlhAQABAPAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==", 'base64'); | |
res.type('image/gif'); | |
res.set('Content-Disposition', 'inline;'); | |
return [res, buf]; | |
} else { | |
// respond with 'ok?' | |
return [res, 'ok']; | |
} | |
return [res, body]; | |
} | |
/** Builds the appropriate http response when dealing with click redirects. */ | |
function buildRedirectHttpResponse(event, partitionKey, queryParams, res){ | |
// this should also return a httpresponse | |
// see if we've got a location | |
var redirectUrl = queryParams.u; | |
if (typeof redirectUrl !== 'undefined') { | |
res.status(302); | |
res.setHeader('Location', redirectUrl); | |
return res; | |
} else { | |
var badRow = "Redirect failed due to lack of u parameter"; | |
console.log(badRow); | |
res.status(400); // bad request | |
// TODO: sink a badrow here | |
// storeRawEvents(badRow, partitionKey); | |
return res; | |
} | |
} | |
// Builds a cookie header with the network user id as value. | |
function cookieHeader(cookieConfig, networkUserId, res){ | |
var expiration = 10000; // this should be read from config | |
var cookieName = 'test'; | |
var value = networkUserId; | |
var domain = 'localhost'; | |
var path = '/'; | |
res.cookie(cookieName, value, { | |
expires: Date.now() + expiration, | |
domain: domain, | |
path: '/' | |
}); | |
return res; | |
} | |
// Build a location header redirecting to itself to check if third party cookies are blocked. | |
function bounceLocationHeader(queryParams, req, cookieBounceName, bounce, res) { | |
// returns a httpheader | |
if (bounce) { | |
// not sure if the line below is correct or not | |
// url format does not work so we need something else here | |
// perhaps back to manually constructing the URL? | |
var redirectUri = URL.format({ | |
protocol: req.protocol, | |
host: req.get('host'), | |
pathname: req.originalUrl | |
}); | |
console.log('1:', redirectUri); | |
redirectUri.search = queryParams; | |
redirectUri.searchParams.append(cookieBounceName, 'true'); | |
console.log(redirectUri); | |
res.setHeader('Location', redirectUri); | |
} | |
return res; | |
} | |
// retrieve all headers from the request except Remote-Address and Raw-Request-URI | |
function headers2(res){ | |
var headersRemoved = res.headers.filter(header => header !== 'remote-address' && header !== 'raw-request-uri'); | |
return headersRemoved; | |
} | |
// Gets the IP from a RemoteAddress. If ipAsPartitionKey is false, a UUID will be generated. | |
function ipAndPartitionKey(remoteAddress, ipAsPartitionKey) { | |
// returns two strings? | |
if (typeof remoteAddress !== 'undefined') { | |
return ["unknown", uuidv4()]; | |
} else { | |
return [ip, (ipAsPartitionKey ? ip : uuidv4())]; | |
} | |
} | |
// Gets the network user id from the query string or the request cookie. | |
function networkUserId(req, requestCookie){ | |
var nuid = req._parsedUrl.nuid; | |
var cookieNuid = requestCookie.value; | |
if (typeof nuid !== 'undefined'){ | |
return nuid; | |
} else { | |
return cookieNuid; | |
} | |
} | |
// Creates an Access-Control-Allow-Origin header which specifically allows the domain which made the request | |
function accessControlAllowOriginHeader(req){ | |
var origin = req.get('Origin'); | |
return typeof origin !== 'undefined' ? origin : '*'; | |
} | |
// Puts together a bad row ready for sinking | |
function createBadRow(){ | |
return | |
} | |
// Begin routes | |
app.get('/i', function(req, res){ | |
var qs = req._parsedUrl.query; | |
var path = req.path; | |
var cookie = req.cookie; | |
var userAgent = req.headers['user-agent']; | |
var refererURI = req.get('Referrer'); | |
var host = req.hostname; | |
var ip = req.ip; | |
var request = req; | |
[res, body] = cookie1(qs, null, path, cookie, userAgent, refererURI, host, ip, request, true, 'ct', res); | |
res.status(200).send(body); | |
}); | |
app.options('*', function(req, res){ | |
// preflight response | |
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOriginHeader(req)); | |
res.setHeader('Access-Control-Allow-Headers', 'content-type'); | |
res.setHeader('Access-Control-Allow-Credentials', 'true'); | |
res.send(); | |
}); | |
// end routes | |
app.use(function (req, res, next) { | |
res.status(404).send("404 Not found"); | |
}); | |
app.set('x-powered-by', false); | |
app.listen(3000, () => console.log('Example app listening on port 3000')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment