Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Attempt at a Node collector
// this collector code isn't finished
const fs = require('fs');
const path = require('path');
const bufrw = require('bufrw');
const Thrift = require('thriftrw').Thrift;
const thriftrw = require('thriftrw');
const express = require('express', '4.16.2');
const cookieParser = require('cookie-parser');
const uuidv4 = require('uuid/v4');
const app = express();
const buffer = require('buffer');
const { URL, URLSearchParams } = require('url');
app.use(cookieParser());
function thriftify(payload){
var source = fs.readFileSync(path.join('payload.thrift'), 'ascii');
var thrift = new Thrift({
source: source,
strict: false,
allowOptionalArguments: false,
defaultAsUndefined: true
});
var test = new thrift.CollectorPayload(payload);
var t = thrift.getType('CollectorPayload');
var byteString = t.toBuffer(test).toString('utf8');
return byteString;
}
// we also need to set the P3P header
// health endpoint (200)
app.get('/health', function(req, res) {
res.send('OK');
});
app.get('/crossdomain.xml', function(req,res) {
// serve crossdomain.xml
var encoding = 'latin1'; // ISO-8859-1
var flash = new Buffer("<?xml version=\"1.0\"?>\n<cross-domain-policy>\n <allow-access-from domain=\"*\" secure=\"false\" />\n</cross-domain-policy>", encoding);
res.setHeader('Content-Type', 'text/xml; charset=ISO-8859-1');
res.send(flash);
});
// responseHandler
function cookie1(queryString, body, path, cookie, userAgent, refererUri, hostname, ip, request, pixelExpected, contentType, res){
// TODO: ip address and partition keys
var queryParams = request.query;
var useIpAddressAsPartitionKey = false;
var [ipAddress, partitionKey] = ipAndPartitionKey(ip, useIpAddressAsPartitionKey);
var redirect = path.startsWith('/r/');
var nuidOpt = getNetworkUserId(request, cookie);
var cookieBounceName = 'n3pc';
// check if the cookiebouncename is defined
var bouncing = typeof queryParams.cookieBounceName !== 'undefined';
console.log('Bouncing?:', bouncing);
// we bounce if it's enabled and we couldn't retrieve the nuid and we're not already bouncing
var bounce = typeof nuidOpt == 'undefined' && !bouncing && pixelExpected && !redirect;
var nuid = "";
if (typeof nuidOpt !== 'undefined') {
nuid = nuidOpt;
} else if (bouncing) { // should this be bounce?
nuid = "fallback";
} else {
nuid = uuidv4();
}
var ct = contentType; // this should be lowercase
var event = buildEvent(
queryString,
body,
path,
userAgent,
refererUri,
hostname,
ip,
request,
nuid,
ct
);
// request.uri is not a real thing...fix this
// https://stackoverflow.com/questions/10183291/how-to-get-the-full-url-in-express
// sink responses happens here
res = bounceLocationHeader(queryParams, request, cookieBounceName, bounce);
// we need to set a list of headers here
// 1. set bounce location header
// THIS NEEDS TO BE DONE!
// 2. set p3p header
res.setHeader('P3P', 'policyref="/w3c/p3p.xml", CP="NOI DSP COR NID PSA OUR IND COM NAV STA"');
// 3. set access control allow origin header
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOriginHeader(request));
// 4. access control allow credentials
res.setHeader('Access-Control-Allow-Credentials', 'true');
// now we can build the response
// reassigning or additive?
var headers1 = []; // not sure what this should be set to
var sink = null; // this should be defined later.
[res, body] = buildHttpResponse(event, partitionKey, queryParams, headers1, redirect, pixelExpected, bounce, sink, res);
// this ordinarily returns a http response
// and a List[Array[Bute]]
return [res, body];
}
function getNetworkUserId(request, cookie){
// set the network user id if available
// otherwise generate a uuid4
var cookieName = 'sp';
// Check if nuid (network id) param is present
var networkUserIdParam = request.query.nuid;
switch (networkUserIdParam) {
case typeof networkUserIdParam !== 'undefined':
// Use nuid as networkUserId if present
return networkUserIdParam;
default:
var cookieId = request.cookies[cookieName];
// Else use the same UUID if the request cookie contains `sp`.
if (typeof cookieId !== 'undefined') {
return cookieId;
}
return undefined;
}
}
class CollectorPayload {
}
// Builds a raw event from an HTTP request
function buildEvent(queryString, body, path, userAgent, refererUri, hostname, ipAddress, request, networkUserId, contentType) {
var event = new CollectorPayload();
event.schema = 'iglu:com.snowplowanalytics.snowplow/CollectorPayload/thrift/1-0-0';
event.ipAddress = ipAddress;
event.timestamp = (new Date).getTime();
event.encoding = 'UTF-8';
event.collector = 'node-collector-0.0.1';
event.path = path;
event.querystring = queryString;
event.body = body;
event.hostname = hostname;
event.networkUserId = networkUserId;
event.userAgent = userAgent;
event.refererUri = refererUri;
return event;
}
// end responseHandler
function buildHttpResponse(event, partitionKey, queryParams, headers, redirect, pixelExpected, bounce, sinkConfig, res){
// ordinarily returns HttpResponse, List[Array[Byte]]
if (redirect) {
res = buildRedirectHttpResponse(event, partitionKey, queryParams, res);
return res;
} else {
// TODO: check that the Kinesis sink isn't shutting down
// we need to ensure here that headers are preserved
[res, body] = buildUsualHttpResponse(pixelExpected, bounce, res);
return [res, body];
}
}
function buildUsualHttpResponse(pixelExpected, bounce, res){
if (pixelExpected && bounce) {
// respond with found?
res.status(302);
} else if (pixelExpected && bounce === false) {
// respond with the gif
const buf = new Buffer("R0lGODlhAQABAPAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==", 'base64');
res.type('image/gif');
res.set('Content-Disposition', 'inline;');
return [res, buf];
} else {
// respond with 'ok?'
return [res, 'ok'];
}
return [res, body];
}
/** Builds the appropriate http response when dealing with click redirects. */
function buildRedirectHttpResponse(event, partitionKey, queryParams, res){
// this should also return a httpresponse
// see if we've got a location
var redirectUrl = queryParams.u;
if (typeof redirectUrl !== 'undefined') {
res.status(302);
res.setHeader('Location', redirectUrl);
return res;
} else {
var badRow = "Redirect failed due to lack of u parameter";
console.log(badRow);
res.status(400); // bad request
// TODO: sink a badrow here
// storeRawEvents(badRow, partitionKey);
return res;
}
}
// Builds a cookie header with the network user id as value.
function cookieHeader(cookieConfig, networkUserId, res){
var expiration = 10000; // this should be read from config
var cookieName = 'test';
var value = networkUserId;
var domain = 'localhost';
var path = '/';
res.cookie(cookieName, value, {
expires: Date.now() + expiration,
domain: domain,
path: '/'
});
return res;
}
// Build a location header redirecting to itself to check if third party cookies are blocked.
function bounceLocationHeader(queryParams, req, cookieBounceName, bounce, res) {
// returns a httpheader
if (bounce) {
// not sure if the line below is correct or not
// url format does not work so we need something else here
// perhaps back to manually constructing the URL?
var redirectUri = URL.format({
protocol: req.protocol,
host: req.get('host'),
pathname: req.originalUrl
});
console.log('1:', redirectUri);
redirectUri.search = queryParams;
redirectUri.searchParams.append(cookieBounceName, 'true');
console.log(redirectUri);
res.setHeader('Location', redirectUri);
}
return res;
}
// retrieve all headers from the request except Remote-Address and Raw-Request-URI
function headers2(res){
var headersRemoved = res.headers.filter(header => header !== 'remote-address' && header !== 'raw-request-uri');
return headersRemoved;
}
// Gets the IP from a RemoteAddress. If ipAsPartitionKey is false, a UUID will be generated.
function ipAndPartitionKey(remoteAddress, ipAsPartitionKey) {
// returns two strings?
if (typeof remoteAddress !== 'undefined') {
return ["unknown", uuidv4()];
} else {
return [ip, (ipAsPartitionKey ? ip : uuidv4())];
}
}
// Gets the network user id from the query string or the request cookie.
function networkUserId(req, requestCookie){
var nuid = req._parsedUrl.nuid;
var cookieNuid = requestCookie.value;
if (typeof nuid !== 'undefined'){
return nuid;
} else {
return cookieNuid;
}
}
// Creates an Access-Control-Allow-Origin header which specifically allows the domain which made the request
function accessControlAllowOriginHeader(req){
var origin = req.get('Origin');
return typeof origin !== 'undefined' ? origin : '*';
}
// Puts together a bad row ready for sinking
function createBadRow(){
return
}
// Begin routes
app.get('/i', function(req, res){
var qs = req._parsedUrl.query;
var path = req.path;
var cookie = req.cookie;
var userAgent = req.headers['user-agent'];
var refererURI = req.get('Referrer');
var host = req.hostname;
var ip = req.ip;
var request = req;
[res, body] = cookie1(qs, null, path, cookie, userAgent, refererURI, host, ip, request, true, 'ct', res);
res.status(200).send(body);
});
app.options('*', function(req, res){
// preflight response
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOriginHeader(req));
res.setHeader('Access-Control-Allow-Headers', 'content-type');
res.setHeader('Access-Control-Allow-Credentials', 'true');
res.send();
});
// end routes
app.use(function (req, res, next) {
res.status(404).send("404 Not found");
});
app.set('x-powered-by', false);
app.listen(3000, () => console.log('Example app listening on port 3000'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.