Skip to content

Instantly share code, notes, and snippets.

@miike
Created March 14, 2019 22:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save miike/cbe99c2d8c220b548f062dca23cdc6e0 to your computer and use it in GitHub Desktop.
Save miike/cbe99c2d8c220b548f062dca23cdc6e0 to your computer and use it in GitHub Desktop.
Attempt at a Node collector
// this collector code isn't finished
const fs = require('fs');
const path = require('path');
const bufrw = require('bufrw');
const Thrift = require('thriftrw').Thrift;
const thriftrw = require('thriftrw');
const express = require('express', '4.16.2');
const cookieParser = require('cookie-parser');
const uuidv4 = require('uuid/v4');
const app = express();
const buffer = require('buffer');
const { URL, URLSearchParams } = require('url');
app.use(cookieParser());
function thriftify(payload){
var source = fs.readFileSync(path.join('payload.thrift'), 'ascii');
var thrift = new Thrift({
source: source,
strict: false,
allowOptionalArguments: false,
defaultAsUndefined: true
});
var test = new thrift.CollectorPayload(payload);
var t = thrift.getType('CollectorPayload');
var byteString = t.toBuffer(test).toString('utf8');
return byteString;
}
// we also need to set the P3P header
// health endpoint (200)
app.get('/health', function(req, res) {
res.send('OK');
});
app.get('/crossdomain.xml', function(req,res) {
// serve crossdomain.xml
var encoding = 'latin1'; // ISO-8859-1
var flash = new Buffer("<?xml version=\"1.0\"?>\n<cross-domain-policy>\n <allow-access-from domain=\"*\" secure=\"false\" />\n</cross-domain-policy>", encoding);
res.setHeader('Content-Type', 'text/xml; charset=ISO-8859-1');
res.send(flash);
});
// responseHandler
function cookie1(queryString, body, path, cookie, userAgent, refererUri, hostname, ip, request, pixelExpected, contentType, res){
// TODO: ip address and partition keys
var queryParams = request.query;
var useIpAddressAsPartitionKey = false;
var [ipAddress, partitionKey] = ipAndPartitionKey(ip, useIpAddressAsPartitionKey);
var redirect = path.startsWith('/r/');
var nuidOpt = getNetworkUserId(request, cookie);
var cookieBounceName = 'n3pc';
// check if the cookiebouncename is defined
var bouncing = typeof queryParams.cookieBounceName !== 'undefined';
console.log('Bouncing?:', bouncing);
// we bounce if it's enabled and we couldn't retrieve the nuid and we're not already bouncing
var bounce = typeof nuidOpt == 'undefined' && !bouncing && pixelExpected && !redirect;
var nuid = "";
if (typeof nuidOpt !== 'undefined') {
nuid = nuidOpt;
} else if (bouncing) { // should this be bounce?
nuid = "fallback";
} else {
nuid = uuidv4();
}
var ct = contentType; // this should be lowercase
var event = buildEvent(
queryString,
body,
path,
userAgent,
refererUri,
hostname,
ip,
request,
nuid,
ct
);
// request.uri is not a real thing...fix this
// https://stackoverflow.com/questions/10183291/how-to-get-the-full-url-in-express
// sink responses happens here
res = bounceLocationHeader(queryParams, request, cookieBounceName, bounce);
// we need to set a list of headers here
// 1. set bounce location header
// THIS NEEDS TO BE DONE!
// 2. set p3p header
res.setHeader('P3P', 'policyref="/w3c/p3p.xml", CP="NOI DSP COR NID PSA OUR IND COM NAV STA"');
// 3. set access control allow origin header
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOriginHeader(request));
// 4. access control allow credentials
res.setHeader('Access-Control-Allow-Credentials', 'true');
// now we can build the response
// reassigning or additive?
var headers1 = []; // not sure what this should be set to
var sink = null; // this should be defined later.
[res, body] = buildHttpResponse(event, partitionKey, queryParams, headers1, redirect, pixelExpected, bounce, sink, res);
// this ordinarily returns a http response
// and a List[Array[Bute]]
return [res, body];
}
function getNetworkUserId(request, cookie){
// set the network user id if available
// otherwise generate a uuid4
var cookieName = 'sp';
// Check if nuid (network id) param is present
var networkUserIdParam = request.query.nuid;
switch (networkUserIdParam) {
case typeof networkUserIdParam !== 'undefined':
// Use nuid as networkUserId if present
return networkUserIdParam;
default:
var cookieId = request.cookies[cookieName];
// Else use the same UUID if the request cookie contains `sp`.
if (typeof cookieId !== 'undefined') {
return cookieId;
}
return undefined;
}
}
class CollectorPayload {
}
// Builds a raw event from an HTTP request
function buildEvent(queryString, body, path, userAgent, refererUri, hostname, ipAddress, request, networkUserId, contentType) {
var event = new CollectorPayload();
event.schema = 'iglu:com.snowplowanalytics.snowplow/CollectorPayload/thrift/1-0-0';
event.ipAddress = ipAddress;
event.timestamp = (new Date).getTime();
event.encoding = 'UTF-8';
event.collector = 'node-collector-0.0.1';
event.path = path;
event.querystring = queryString;
event.body = body;
event.hostname = hostname;
event.networkUserId = networkUserId;
event.userAgent = userAgent;
event.refererUri = refererUri;
return event;
}
// end responseHandler
function buildHttpResponse(event, partitionKey, queryParams, headers, redirect, pixelExpected, bounce, sinkConfig, res){
// ordinarily returns HttpResponse, List[Array[Byte]]
if (redirect) {
res = buildRedirectHttpResponse(event, partitionKey, queryParams, res);
return res;
} else {
// TODO: check that the Kinesis sink isn't shutting down
// we need to ensure here that headers are preserved
[res, body] = buildUsualHttpResponse(pixelExpected, bounce, res);
return [res, body];
}
}
function buildUsualHttpResponse(pixelExpected, bounce, res){
if (pixelExpected && bounce) {
// respond with found?
res.status(302);
} else if (pixelExpected && bounce === false) {
// respond with the gif
const buf = new Buffer("R0lGODlhAQABAPAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==", 'base64');
res.type('image/gif');
res.set('Content-Disposition', 'inline;');
return [res, buf];
} else {
// respond with 'ok?'
return [res, 'ok'];
}
return [res, body];
}
/** Builds the appropriate http response when dealing with click redirects. */
function buildRedirectHttpResponse(event, partitionKey, queryParams, res){
// this should also return a httpresponse
// see if we've got a location
var redirectUrl = queryParams.u;
if (typeof redirectUrl !== 'undefined') {
res.status(302);
res.setHeader('Location', redirectUrl);
return res;
} else {
var badRow = "Redirect failed due to lack of u parameter";
console.log(badRow);
res.status(400); // bad request
// TODO: sink a badrow here
// storeRawEvents(badRow, partitionKey);
return res;
}
}
// Builds a cookie header with the network user id as value.
function cookieHeader(cookieConfig, networkUserId, res){
var expiration = 10000; // this should be read from config
var cookieName = 'test';
var value = networkUserId;
var domain = 'localhost';
var path = '/';
res.cookie(cookieName, value, {
expires: Date.now() + expiration,
domain: domain,
path: '/'
});
return res;
}
// Build a location header redirecting to itself to check if third party cookies are blocked.
function bounceLocationHeader(queryParams, req, cookieBounceName, bounce, res) {
// returns a httpheader
if (bounce) {
// not sure if the line below is correct or not
// url format does not work so we need something else here
// perhaps back to manually constructing the URL?
var redirectUri = URL.format({
protocol: req.protocol,
host: req.get('host'),
pathname: req.originalUrl
});
console.log('1:', redirectUri);
redirectUri.search = queryParams;
redirectUri.searchParams.append(cookieBounceName, 'true');
console.log(redirectUri);
res.setHeader('Location', redirectUri);
}
return res;
}
// retrieve all headers from the request except Remote-Address and Raw-Request-URI
function headers2(res){
var headersRemoved = res.headers.filter(header => header !== 'remote-address' && header !== 'raw-request-uri');
return headersRemoved;
}
// Gets the IP from a RemoteAddress. If ipAsPartitionKey is false, a UUID will be generated.
function ipAndPartitionKey(remoteAddress, ipAsPartitionKey) {
// returns two strings?
if (typeof remoteAddress !== 'undefined') {
return ["unknown", uuidv4()];
} else {
return [ip, (ipAsPartitionKey ? ip : uuidv4())];
}
}
// Gets the network user id from the query string or the request cookie.
function networkUserId(req, requestCookie){
var nuid = req._parsedUrl.nuid;
var cookieNuid = requestCookie.value;
if (typeof nuid !== 'undefined'){
return nuid;
} else {
return cookieNuid;
}
}
// Creates an Access-Control-Allow-Origin header which specifically allows the domain which made the request
function accessControlAllowOriginHeader(req){
var origin = req.get('Origin');
return typeof origin !== 'undefined' ? origin : '*';
}
// Puts together a bad row ready for sinking
function createBadRow(){
return
}
// Begin routes
app.get('/i', function(req, res){
var qs = req._parsedUrl.query;
var path = req.path;
var cookie = req.cookie;
var userAgent = req.headers['user-agent'];
var refererURI = req.get('Referrer');
var host = req.hostname;
var ip = req.ip;
var request = req;
[res, body] = cookie1(qs, null, path, cookie, userAgent, refererURI, host, ip, request, true, 'ct', res);
res.status(200).send(body);
});
app.options('*', function(req, res){
// preflight response
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOriginHeader(req));
res.setHeader('Access-Control-Allow-Headers', 'content-type');
res.setHeader('Access-Control-Allow-Credentials', 'true');
res.send();
});
// end routes
app.use(function (req, res, next) {
res.status(404).send("404 Not found");
});
app.set('x-powered-by', false);
app.listen(3000, () => console.log('Example app listening on port 3000'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment