Skip to content

Instantly share code, notes, and snippets.

View abelsonlive's full-sized avatar
🕳️
[ o o ]

Brian Abelson abelsonlive

🕳️
[ o o ]
View GitHub Profile
@abelsonlive
abelsonlive / lda.R
Created December 6, 2012 17:55
topic modeling in R
# Brian Abelson @brianabelson
# Harmony Institute
# December 5, 2012
# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package
# it fits topic models using latent dirichlet allocation
# it provides arguments for cleaning the input text and tuning the parameters of the model
# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data
# this allows you to easily model outcomes based on the distribution of topics within a collection of texts
@abelsonlive
abelsonlive / entarteur.py
Created December 5, 2012 05:35
use facepy for facebook feed dumps
from facepy import GraphAPI
import facepy
import re
import json
#meta variables
access_token = 'your_token'
page_id = 'the_page' # input page id here
base_query = page_id + '/feed?limit=300'
@abelsonlive
abelsonlive / leading_zeros.R
Created December 1, 2012 03:51
Add leading zeros to columns
leading_zeros <- function(x=NULL, n_digits=NULL) {
require("plyr")
nchars_array <- laply(x, nchar)
nchars <- sort(unique(c(nchars_array), n_digits))
for(i in nchars[-length(nchars)]) {
subset <- x[nchars_array==i]
zeros <- rep("0", n_digits-i)
zeros <- paste(zeros, collapse="")
x[nchars_array==i] <- paste0(zeros, subset)
return(as.character(x))
@abelsonlive
abelsonlive / subset.sh
Created November 30, 2012 21:42
use awk to subset csv by query
## extract one column, 1 = 1st Column
cat data.csv | awk -F"," '{print $1}'> column.txt
## extract all columns that match a query for one row, 0 = All Columns
cat data.csv | awk -F"," '{if($2==140) print $0}'> census_subset.csv
@abelsonlive
abelsonlive / subset.sh
Created November 30, 2012 21:23
use awk to subset csv
cat census.csv | awk -F"," '{print $2}' > subset.txt
@abelsonlive
abelsonlive / insights.R
Created November 30, 2012 01:35
curl postform / getform example
username <- "username@gmail.com"
password <- "password_here"
loginURL <- "https://accounts.google.com/accounts/ServiceLogin"
authenticateURL <- "https://accounts.google.com/accounts/ServiceLoginAuth"
require(RCurl)
ch <- getCurlHandle()
@abelsonlive
abelsonlive / kMeansPP.R
Created November 29, 2012 15:29 — forked from dsparks/kMeansPP.R
k-Means ++ center initialization algorithm
toInstall <- c("proxy")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# kmeans++ center initialization algorithm
kMeansPP <- function(df, k, doPlot = TRUE){
kCenters <- data.frame(matrix(NA, ncol = ncol(df), nrow = k))
whichPoints <- rep(NA, k)
whichPoints[1] <- sample(1:nrow(df), 1)
kCenters[1, ] <- df[whichPoints[1], ] # Initial center
@abelsonlive
abelsonlive / scrapeply.R
Created November 20, 2012 03:47
scrape with llply, avoiding errors
# scrape with llply, handling errors
output <- llply(urls, function(url) {
out <- try(scrapeCast(url), TRUE)
if (class(out)=='try-error') {
out <- NULL
print(paste("error scraping" url))
} else {
return(out)
}
}, .progress="text")
@abelsonlive
abelsonlive / cbind.fill.R
Created November 19, 2012 18:12
cbind.fill.R
# yay!
cbind.fill<-function(...){
nm <- list(...)
nm<-lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}
@abelsonlive
abelsonlive / mongo_import.sh
Created November 17, 2012 09:36
import csv into mongodb from command line
#!/usr/bin/bash
## include --upsert if adding to a prexistitng collection
mongoimport -d db_name -c coll_name --type csv --file file.csv --headerline