Benn Stancil bstancil

## mcknight_scrape.py
import requests
import pandas as pd

from bs4 import BeautifulSoup

def get_details(grant):
	details = grant.findAll("div")

	amount   = details[0].text.strip()
	year     = details[1].text.strip()

## gist:90b5c4d64ff0699cc0d4
-- DAILY

SELECT DATE_TRUNC('day', sent_at) AS day,
       COUNT(DISTINCT user_id) AS users, -- These are people who are logged in
       COUNT(DISTINCT anonymous_id) AS visitors -- These are all visitors, including those who might be logged out
  FROM segment.tracks
 GROUP BY 1
 ORDER BY 1

-- WEEKLY

## sessions.sql
SELECT *,
       MIN(context_campaign_medium) OVER (PARTITION BY user_id ORDER BY occurred_at) AS first_channel,
       SUM(new_session) OVER (ORDER BY user_id, occurred_at) AS global_session_id,
       SUM(new_session) OVER (PARTITION BY user_id ORDER BY occurred_at) AS user_session_id
  FROM (
        SELECT *,
               CASE WHEN EXTRACT('EPOCH' FROM occurred_at)
                         - EXTRACT('EPOCH' FROM last_event) >= (60 * 10)
                      OR last_event IS NULL
                    THEN 1 ELSE 0 END AS is_new_session

## gist:57434bec14b5e5966da8

    <!--Load the AJAX API-->
    <script type="text/javascript" src="https://www.google.com/jsapi"></script>
    <script type="text/javascript">

      // Get the HTML body before calling google.load
      var HTML = document.body.innerHTML;

      // Load the Visualization API and the controls package.
      google.load('visualization', '1.0', {'packages':['controls']});

## Multiple bridges.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bstancil
                / Multiple bridges.md
            
            
              Last active
              January 13, 2016 19:58
            
          
    Using Bridge with Multiple-Orgs

Currently Bridge can only act on behalf of a single Organization at a time, however it is possible to run multiple instances of Bridge on a server. To do so, we'll have to make some modifications to the system configuration.
Assumptions:

Linux/Ubuntu 14.04
Organization #1: RobotOrg
Organization #2: HumanOrg

Step 1 - Select RobotOrg in Mode and follow the normal Add-Data flow for a Data Source on a private network.

  
## Universal user id.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                bstancil
                / Universal user id.md
            
            
              Last active
              January 13, 2016 14:57
            
          
    A few thoughts on creating a "universal user ID":
Unfortuantely, this isn't a trival task. There are some general ways that you can get close, but Segment's data is rarely clean enough that you can create a perfect mapping.
Ultimately, the problem stems from the fact that Segment's two basic ways of tracking identity - anonymous ids and user ids - overlap in both directions. Anonymous ids, which are basically browser cookies, can map to multiple user ids if multiple people log in from the same browser. And user ids can map to multiple anonymous ids if a user uses multiple browers.
This means that there are some cases when you can't conclusively say which user an anonymous id represents. There are ways to make best guesses, but it's not certain.
First, I create a big table of all the user_ids and anonymous_ids that we've tracked. How you do this exactly depends a little bit on your implementation fo Segment, but we do something like this:

  
## the best bachelorette.py
import csv
import requests
import nltk, string
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer

stemmer = nltk.stem.porter.PorterStemmer()
remove_punctuation_map = dict((ord(char), None) for char in string.punctuation)

def stem_tokens(tokens):

## Schema.R

V(g)$label.cex <- 3 * (0.06125 * V(g)$degree / max(V(g)$degree) + .2)
V(g)$label.color <- rgb(0, 0, .2, .49 * V(g)$degree / max(V(g)$degree) + .5)
V(g)$frame.color <- rgb(0, 0, .2, .39 * V(g)$degree / max(V(g)$degree) + .6)
egam <- (log(E(g)$weight)+.4) / max(log(E(g)$weight)+.4)
E(g)$color <- rgb((colorRamp(c("blue", "yellow", "red"))(E(g)$weight/max(E(g)$weight)))/255)
E(g)$width <- egam
plot(g, layout=layout_on_sphere(g), vert

## gist:71a49cad0e877b4ec096
<link href="https://cdn.rawgit.com/jaz303/tipsy/master/src/stylesheets/tipsy.css" rel="stylesheet" type="text/css">
<style>

#wrapper {
  font-family: Helvetica, Arial, sans-serif;
  font-size: 12px;
  width: 900px;
  margin: 0 auto;
  padding-bottom: 10px;
}

## gist:554c0f5de49d046ce70b
SIMPLE VERSION

## Snippet

SELECT rr.id,
       rr.created_at,
       rr.report_id,
       rr.account_id,
       rr.executed_by_id,
       rr.data_source_id,
	import requests
	import pandas as pd

	from bs4 import BeautifulSoup

	def get_details(grant):
	details = grant.findAll("div")

	amount = details[0].text.strip()
	year = details[1].text.strip()
	-- DAILY

	SELECT DATE_TRUNC('day', sent_at) AS day,
	COUNT(DISTINCT user_id) AS users, -- These are people who are logged in
	COUNT(DISTINCT anonymous_id) AS visitors -- These are all visitors, including those who might be logged out
	FROM segment.tracks
	GROUP BY 1
	ORDER BY 1

	-- WEEKLY
	SELECT *,
	MIN(context_campaign_medium) OVER (PARTITION BY user_id ORDER BY occurred_at) AS first_channel,
	SUM(new_session) OVER (ORDER BY user_id, occurred_at) AS global_session_id,
	SUM(new_session) OVER (PARTITION BY user_id ORDER BY occurred_at) AS user_session_id
	FROM (
	SELECT *,
	CASE WHEN EXTRACT('EPOCH' FROM occurred_at)
	- EXTRACT('EPOCH' FROM last_event) >= (60 * 10)
	OR last_event IS NULL
	THEN 1 ELSE 0 END AS is_new_session

	<!--Load the AJAX API-->
	<script type="text/javascript" src="https://www.google.com/jsapi"></script>
	<script type="text/javascript">

	// Get the HTML body before calling google.load
	var HTML = document.body.innerHTML;

	// Load the Visualization API and the controls package.
	google.load('visualization', '1.0', {'packages':['controls']});
	import csv
	import requests
	import nltk, string
	from bs4 import BeautifulSoup
	from sklearn.feature_extraction.text import TfidfVectorizer

	stemmer = nltk.stem.porter.PorterStemmer()
	remove_punctuation_map = dict((ord(char), None) for char in string.punctuation)

	def stem_tokens(tokens):

	V(g)$label.cex <- 3 * (0.06125 * V(g)$degree / max(V(g)$degree) + .2)
	V(g)$label.color <- rgb(0, 0, .2, .49 * V(g)$degree / max(V(g)$degree) + .5)
	V(g)$frame.color <- rgb(0, 0, .2, .39 * V(g)$degree / max(V(g)$degree) + .6)
	egam <- (log(E(g)$weight)+.4) / max(log(E(g)$weight)+.4)
	E(g)$color <- rgb((colorRamp(c("blue", "yellow", "red"))(E(g)$weight/max(E(g)$weight)))/255)
	E(g)$width <- egam
	plot(g, layout=layout_on_sphere(g), vert
	<link href="https://cdn.rawgit.com/jaz303/tipsy/master/src/stylesheets/tipsy.css" rel="stylesheet" type="text/css">
	<style>

	#wrapper {
	font-family: Helvetica, Arial, sans-serif;
	font-size: 12px;
	width: 900px;
	margin: 0 auto;
	padding-bottom: 10px;
	}
	SIMPLE VERSION

	## Snippet

	SELECT rr.id,
	rr.created_at,
	rr.report_id,
	rr.account_id,
	rr.executed_by_id,
	rr.data_source_id,