Brian Abelson abelsonlive

## mrjob_demo.py
# helper fx for filering data #
def query_jsonpath(line, path):
from jsonpath import jsonpath
    value = jsonpath(line, path)
    if value:
        return value[0]  # jsonpath returns matches in a list for some reason
    else:
        return None # fill in 'null' if there's not a match for a particular field

def extract_fields(line, config): # query lines of json w/ jsonpath for specific fields

## tweepy.py
import tweepy

# api keys accessed from http://dev.twitter.com/apps
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# now you can do stuff like this:

# get my last 20 tweets

## gist:5283723
note,midi
C-1,0
C#-1,1
Db-1,1
D-1,2
D#-1,3
Eb-1,3
E-1,4
F-1,5
F#-1,6

## gist:4726453
d <- read.csv("all_posts.csv",  stringsAsFactors=F)
pattern <- "regex here"
d$guns <- 0
d$guns[grep(pattern, d$text)] <- 1
d_guns <- d[d$guns==1,]
write.csv(d_guns, "gun_posts.csv", row.names=F)

## tweetbot.py
import requests
import tweepy
import time
import oauth2
import re
from urllib import quote

# THIS IS WHAT YOU CUSTOMIZE #
# terms
LIST = "members-of-congress"

## gist:4551070
import requests
import tweepy
import time
import oauth2
import re
from urllib import quote

# THIS IS WHAT YOU CUSTOMIZE #
# terms
LIST = "members-of-congress"

## gist:4485712
# transformations with ogr2ogr
ogr2ogr -t_srs EPSG:4326 output_4326.shp input.shp

## lda.R
# after successfully installing hiR, start here:

# setup
library("hiR")

# read in every text file to a single character vector #

# set working directory
setwd("DIggOutput")

## representatives.R
library("scraply") # also loads XML and plyr
tree <- url2tree("http://www.house.gov/representatives/#name_a")
nodes <- tree2node(tree, select='class="directory"')
data <- ldply(nodes, readHTMLTable)
write.csv(data, "representatives.csv", row.names=FALSE)

## allmytweets.py
import codecs
import lxml.html as lh
from selenium import webdriver

browser = webdriver.Firefox()
browser.get('http://www.allmytweets.net/?screen_name=brianabelson')
content = browser.page_source
print content
browser.quit()
	# helper fx for filering data #
	def query_jsonpath(line, path):
	from jsonpath import jsonpath
	value = jsonpath(line, path)
	if value:
	return value[0] # jsonpath returns matches in a list for some reason
	else:
	return None # fill in 'null' if there's not a match for a particular field

	def extract_fields(line, config): # query lines of json w/ jsonpath for specific fields
	import tweepy

	# api keys accessed from http://dev.twitter.com/apps
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	api = tweepy.API(auth)

	# now you can do stuff like this:

	# get my last 20 tweets
	d <- read.csv("all_posts.csv", stringsAsFactors=F)
	pattern <- "regex here"
	d$guns <- 0
	d$guns[grep(pattern, d$text)] <- 1
	d_guns <- d[d$guns==1,]
	write.csv(d_guns, "gun_posts.csv", row.names=F)
	import requests
	import tweepy
	import time
	import oauth2
	import re
	from urllib import quote

	# THIS IS WHAT YOU CUSTOMIZE #
	# terms
	LIST = "members-of-congress"
	# transformations with ogr2ogr
	ogr2ogr -t_srs EPSG:4326 output_4326.shp input.shp
	# after successfully installing hiR, start here:

	# setup
	library("hiR")

	# read in every text file to a single character vector #

	# set working directory
	setwd("DIggOutput")
	library("scraply") # also loads XML and plyr
	tree <- url2tree("http://www.house.gov/representatives/#name_a")
	nodes <- tree2node(tree, select='class="directory"')
	data <- ldply(nodes, readHTMLTable)
	write.csv(data, "representatives.csv", row.names=FALSE)
	import codecs
	import lxml.html as lh
	from selenium import webdriver

	browser = webdriver.Firefox()
	browser.get('http://www.allmytweets.net/?screen_name=brianabelson')
	content = browser.page_source
	print content
	browser.quit()