This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Brian Abelson @brianabelson | |
# Harmony Institute | |
# December 5, 2012 | |
# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package | |
# it fits topic models using latent dirichlet allocation | |
# it provides arguments for cleaning the input text and tuning the parameters of the model | |
# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data | |
# this allows you to easily model outcomes based on the distribution of topics within a collection of texts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from facepy import GraphAPI | |
import facepy | |
import re | |
import json | |
#meta variables | |
access_token = 'your_token' | |
page_id = 'the_page' # input page id here | |
base_query = page_id + '/feed?limit=300' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
leading_zeros <- function(x=NULL, n_digits=NULL) { | |
require("plyr") | |
nchars_array <- laply(x, nchar) | |
nchars <- sort(unique(c(nchars_array), n_digits)) | |
for(i in nchars[-length(nchars)]) { | |
subset <- x[nchars_array==i] | |
zeros <- rep("0", n_digits-i) | |
zeros <- paste(zeros, collapse="") | |
x[nchars_array==i] <- paste0(zeros, subset) | |
return(as.character(x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## extract one column, 1 = 1st Column | |
cat data.csv | awk -F"," '{print $1}'> column.txt | |
## extract all columns that match a query for one row, 0 = All Columns | |
cat data.csv | awk -F"," '{if($2==140) print $0}'> census_subset.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat census.csv | awk -F"," '{print $2}' > subset.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
username <- "username@gmail.com" | |
password <- "password_here" | |
loginURL <- "https://accounts.google.com/accounts/ServiceLogin" | |
authenticateURL <- "https://accounts.google.com/accounts/ServiceLoginAuth" | |
require(RCurl) | |
ch <- getCurlHandle() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
toInstall <- c("proxy") | |
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")} | |
lapply(toInstall, library, character.only = TRUE) | |
# kmeans++ center initialization algorithm | |
kMeansPP <- function(df, k, doPlot = TRUE){ | |
kCenters <- data.frame(matrix(NA, ncol = ncol(df), nrow = k)) | |
whichPoints <- rep(NA, k) | |
whichPoints[1] <- sample(1:nrow(df), 1) | |
kCenters[1, ] <- df[whichPoints[1], ] # Initial center |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# scrape with llply, handling errors | |
output <- llply(urls, function(url) { | |
out <- try(scrapeCast(url), TRUE) | |
if (class(out)=='try-error') { | |
out <- NULL | |
print(paste("error scraping" url)) | |
} else { | |
return(out) | |
} | |
}, .progress="text") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# yay! | |
cbind.fill<-function(...){ | |
nm <- list(...) | |
nm<-lapply(nm, as.matrix) | |
n <- max(sapply(nm, nrow)) | |
do.call(cbind, lapply(nm, function (x) | |
rbind(x, matrix(, n-nrow(x), ncol(x))))) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
## include --upsert if adding to a prexistitng collection | |
mongoimport -d db_name -c coll_name --type csv --file file.csv --headerline |