Skip to content

Instantly share code, notes, and snippets.

@thomasjensen
thomasjensen / extract.py
Created January 5, 2012 18:27
extract information from files downloaded with download.py
from BeautifulSoup import BeautifulSoup
import os
import re
path = "/Users/thomasjensen/Documents/RBloggersScrape/download"
listing = os.listdir(path)
listing = [name for name in listing if re.search(r"post\d+\.html",name) != None]
os.chdir(path)
@thomasjensen
thomasjensen / rbloggerAnalysis.r
Created January 6, 2012 21:13
analysing the data scraped from r-bloggers.com
#read the libraries
library(plyr)
library(ggplot2)
library(xtable)
#set the working direcotry to where you saved the output.csv file from the previous post
setwd("/.../")
#read the data
data <- read.csv("output.csv")
@thomasjensen
thomasjensen / textmining.r
Created January 26, 2012 14:40
text mining of Politikken
##read in the libraries and set the working directory
library(tm)
library(corrplot)
setwd("/path/to/")
##read in the data and subset it to the relevant categories
data <- read.csv("indvandringPolitikken.csv", fileEncoding = "latin1")
data <- data[data$kategori == "Politik" | data$kategori == "Debat" | data$kategori == "Kronikken" | data$kategori == "Leder", ]
##create the corpus and clean it
@thomasjensen
thomasjensen / ebtrust.r
Created February 8, 2012 22:14
eurobarometer survey
library(ggplot2)
setwd("/path/to/file/")
data <- read.csv("ebAll.csv")
data$date <- as.Date(data$date, format = "%y/%m/%d")
data$Tend.to.trust <- as.numeric(gsub("%","",data$Tend.to.trust))
data$Tend.not.to.trust <- as.numeric(gsub("%","",data$Tend.not.to.trust))
data$DK...Don.t.know <- as.numeric(gsub("%","",data$DK...Don.t.know))
@thomasjensen
thomasjensen / simconf.r
Created December 19, 2011 23:40
Simulate fake data to assess model fit
#set the working directoy and read the foreign library
setwd("/.../")
library(foreign)
#read the data and remove missing values of the dependent variable
data <- read.dta("repdata.dta")
data <- data[data$onset != 4,]
#estimate the model
model <- glm(onset ~ warl + gdpenl + lpopl1 + lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac, data = data, family = "binomial")