Skip to content

Instantly share code, notes, and snippets.

@thomasjensen
thomasjensen / ebtrust.r
Created February 8, 2012 22:14
eurobarometer survey
library(ggplot2)
setwd("/path/to/file/")
data <- read.csv("ebAll.csv")
data$date <- as.Date(data$date, format = "%y/%m/%d")
data$Tend.to.trust <- as.numeric(gsub("%","",data$Tend.to.trust))
data$Tend.not.to.trust <- as.numeric(gsub("%","",data$Tend.not.to.trust))
data$DK...Don.t.know <- as.numeric(gsub("%","",data$DK...Don.t.know))
@thomasjensen
thomasjensen / textmining.r
Created January 26, 2012 14:40
text mining of Politikken
##read in the libraries and set the working directory
library(tm)
library(corrplot)
setwd("/path/to/")
##read in the data and subset it to the relevant categories
data <- read.csv("indvandringPolitikken.csv", fileEncoding = "latin1")
data <- data[data$kategori == "Politik" | data$kategori == "Debat" | data$kategori == "Kronikken" | data$kategori == "Leder", ]
##create the corpus and clean it
@thomasjensen
thomasjensen / rbloggerAnalysis.r
Created January 6, 2012 21:13
analysing the data scraped from r-bloggers.com
#read the libraries
library(plyr)
library(ggplot2)
library(xtable)
#set the working direcotry to where you saved the output.csv file from the previous post
setwd("/.../")
#read the data
data <- read.csv("output.csv")
@thomasjensen
thomasjensen / extract.py
Created January 5, 2012 18:27
extract information from files downloaded with download.py
from BeautifulSoup import BeautifulSoup
import os
import re
path = "/Users/thomasjensen/Documents/RBloggersScrape/download"
listing = os.listdir(path)
listing = [name for name in listing if re.search(r"post\d+\.html",name) != None]
os.chdir(path)
@thomasjensen
thomasjensen / download.py
Created January 5, 2012 00:15
Download blog posts from R-bloggers
from BeautifulSoup import BeautifulSoup
import mechanize
import time
url = "http://www.r-bloggers.com/"
br = mechanize.Browser()
page = br.open(url)
@thomasjensen
thomasjensen / ft.r
Created December 21, 2011 21:39
first look at voting data from the Danish parliament
library(plyr)
library(ggplot2)
setwd("/.../")
data <- read.csv("ft.csv")
data.final <- data[data$Amendment == 0,]
data.amendment <- data[data$Amendment == 1,]
activity <- ddply(data.final,c("Year","Month"),function(x) data.frame(count = length(unique(x$Voteid))))
@thomasjensen
thomasjensen / vectorize.r
Created December 21, 2011 08:05
vectorized function for getting outliers in rows of a matrix
data <- cbind(rnorm(100),rnorm(100),rnorm(100))
outlierMat <- function(mat) {
m <- rowMeans(mat)
devs <- abs(mat - m)
val <- apply(mat, 1, max)
pos <- which(mat == val, arr.ind = TRUE)
out <- cbind(pos,val)
return(out)
}
@thomasjensen
thomasjensen / simconf.r
Created December 19, 2011 23:40
Simulate fake data to assess model fit
#set the working directoy and read the foreign library
setwd("/.../")
library(foreign)
#read the data and remove missing values of the dependent variable
data <- read.dta("repdata.dta")
data <- data[data$onset != 4,]
#estimate the model
model <- glm(onset ~ warl + gdpenl + lpopl1 + lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac, data = data, family = "binomial")
@thomasjensen
thomasjensen / applyexample.r
Created December 13, 2011 16:49
How to feed a function to the apply function
data <- cbind(rnorm(100),rnorm(100),rnorm(100))
outlier <- function(xrow){
m <- mean(xrow)
devs <- abs(xrow - m)
pos <- which.max(devs)
val <- devs[pos]
out <- c(pos,val)
return(out)
}
@thomasjensen
thomasjensen / tikz.tex
Created December 7, 2011 11:00
Include output from tikzDevice in LaTeX
\documentclass{article}
\usepackage{tikz}
\begin{document}
\begin{figure}[ht]
\input{test.tex}
\caption{Sample output from tikzDevice}
\end{figure}