Skip to content

Instantly share code, notes, and snippets.

@adamcataldo
Last active December 23, 2015 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamcataldo/6643738 to your computer and use it in GitHub Desktop.
Save adamcataldo/6643738 to your computer and use it in GitHub Desktop.
static class HistoryBotRemover extends SubAssembly {
private HistoryBotRemover() {
this(new Pipe("campaignPageViews"), new Pipe("userEvents"));
}
public HistoryBotRemover(Pipe campaignPageViews, Pipe userEvents) {
Pipe pageLoads = new Each(userEvents, new Fields(CATEGORY), new EqualsValue(PAGELOAD));
pageLoads = new Retain(pageLoads, new Fields(PAGE_VIEW_ID));
pageLoads = new Rename(pageLoads, new Fields(PAGE_VIEW_ID), new Fields(JOIN_FIELD));
Pipe loadedSessions = new CoGroup("loadedSessions", campaignPageViews, new Fields(PAGE_VIEW_ID), pageLoads,
new Fields(JOIN_FIELD));
loadedSessions = new Retain(loadedSessions, new Fields(SESSION_ID));
loadedSessions = new Unique(loadedSessions, new Fields(SESSION_ID));
loadedSessions = new Rename(loadedSessions, new Fields(SESSION_ID), new Fields(JOIN_FIELD));
Pipe loadedPageViews = new CoGroup("loadedPageViews", campaignPageViews, new Fields(SESSION_ID), loadedSessions,
new Fields(JOIN_FIELD), new LeftJoin());
loadedPageViews = new Each(loadedPageViews, new Fields(JOIN_FIELD), new FilterNull());
loadedPageViews = new Discard(loadedPageViews, new Fields(JOIN_FIELD));
Pipe nonBotPageViews = new Each(loadedPageViews, new Fields(USER_ID, REMOTE_HOST, HOST), new IsNotBotPageView());
setTails(nonBotPageViews);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment