Last active
July 3, 2023 10:57
-
-
Save jmrphy/9020745 to your computer and use it in GitHub Desktop.
Eurobarometer 1970-2002 starter script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Starter script for analyzing the Eurobarometer Trend data (1970-2002). | |
### Important: For this script to work, make sure to first set your working directory | |
### to a directory containing the Eurobarometer Trend dataset, | |
### named "eurobarometer_trends.dta." You can obtain this from their website or download | |
### the data file here: https://www.dropbox.com/s/5bdhel8l7c5r59z/eurobarometer_trends.dta?dl=0 | |
### Outline: | |
### 1. Identify variables of interest and clean them up | |
### 2. Estimate a regression model of what makes people turnout for EP elections | |
### Download packages we're going to use | |
# install.packages("ggplot2") | |
# install.packages("foreign") | |
library(ggplot2) | |
library(reshape2) | |
library(foreign) | |
### Read the dataset straight from the web, name the dataframe "data" | |
data <- read.dta("eurobarometer_trends.dta") | |
### Variable "mediause" asks how much the respondent relies on the media. | |
summary(data$mediause) # Get a simple summary | |
data$mediause[data$mediause=="dk"]<-NA # Let's consider all the "dk" answerers as missing (NA) | |
data$mediause[data$mediause=="na"]<-NA # Let's consider all the "dk" as missing (NA) | |
data$mediause[data$mediause=="inap"]<-NA # Let's consider all the "inap" as missing (NA) | |
data$mediause<-factor(data$mediause) # this removes unused levels | |
levels(data$mediause)<-c("Very high", "High", "Low", "Very low") # Clean up the messy names of the levels | |
data$medianum<-as.numeric(data$mediause) # convert the categories to a numerical scale (cheating a bit!) | |
data$medianum<-(4 - data$medianum) # Subtract from 4 to make it more intuitive (higher number = more likely to vote) | |
### Variable "particip" asks respondent how likely they are to vote in the EP elections. | |
summary(data$particip) | |
data$particip[data$particip=="DK,NA"]<-NA | |
data$particip[data$particip=="depends"]<-NA | |
data$particip[data$particip=="inap"]<-NA | |
data$particip<-factor(data$particip) | |
levels(data$particip)<-c("Certainly yes", "Probably yes", "Probably not", "Certainly not") | |
data$participnum<-as.numeric(data$particip) # convert the categories to a numerical scale (cheating a bit!) | |
data$participnum<-(4 - data$participnum) # Subtract from 4 to make it more intuitive (higher number = more likely to vote) | |
summary(data$participnum) | |
### Variable "income" asks the respondent's income level. | |
summary(data$income) | |
### Variable "polint" asks the respondent's political interest generally. | |
summary(data$polint) | |
data$polint[data$polint=="DK,NA"]<-NA | |
data$polint[data$polint=="inap"]<-NA | |
data$polint<-factor(data$polint) | |
levels(data$polint)<-c("A great deal", "To some extent", "Not much", "Not at all") | |
data$polintnum<-as.numeric(data$polint) # convert the categories to a numerical scale (cheating a bit!) | |
data$polintnum<-(4 - data$polintnum) # Subtract from 4 to make it more intuitive (higher number = more likely to vote) | |
summary(data$polintnum) | |
### Variable "ecint3" asks the respondent's interest in EU politics. | |
summary(data$ecint3) | |
data$ecint3[data$ecint3=="DK, NA"]<-NA | |
data$ecint3[data$ecint3=="inap"]<-NA | |
data$ecint3<-factor(data$ecint3) | |
levels(data$ecint3)<-c("Very interested", "A little", "Not at all") | |
data$ecint3num<-as.numeric(data$ecint3) # convert the categories to a numerical scale (cheating a bit!) | |
data$ecint3num<-(3 - data$ecint3num) # Subtract from 4 to make it more intuitive (higher number = more likely to vote) | |
summary(data$ecint3num) | |
### Variable "ecint4" asks the respondent's interest in EU politics. | |
summary(data$ecint4) | |
data$ecint4[data$ecint4=="DK, NA"]<-NA | |
data$ecint4[data$ecint4=="inap"]<-NA | |
data$ecint4<-factor(data$ecint4) | |
levels(data$ecint4)<-c("A great deal", "To some extent", "Not much", "Not at all") | |
data$ecint4num<-as.numeric(data$ecint4) # convert the categories to a numerical scale (cheating a bit!) | |
data$ecint4num<-(4 - data$ecint4num) # Subtract from 4 to make it more intuitive (higher number = more likely to vote) | |
summary(data$ecint4num) | |
### Variable "relimp" asks the respondent's importance given to religion. | |
summary(data$relimp) | |
data$relimp[data$relimp=="DK,NA"]<-NA | |
data$relimp[data$relimp=="inap"]<-NA | |
data$relimp<-factor(data$relimp) | |
levels(data$relimp)<-c("Great", "Some", "Little") | |
summary(data$relimp) | |
data$relimpnum<-as.numeric(data$relimp) # convert the categories to a numerical scale (cheating a bit!) | |
data$relimpnum<-(3 - data$relimpnum) # Subtract from 4 to make it more intuitive (higher number = more likely to vote) | |
summary(data$relimpnum) | |
### Variable "nation1" captures the respondent's nation | |
summary(data$nation1) | |
### Make a subset for Great Britain only | |
gb<-subset(data, nation1=="GREAT BRITAIN") | |
### Variable "year" captures the year of the survey | |
summary(data$year) | |
### Convert country names to all caps for when we plot | |
data$nation1<-toupper(data$nation1) | |
### Aggregate the data at the country-year level | |
agg<-data[c("relimpnum", "nation1", "year")] # make a subset with only the religion variable | |
# Aggregate the mean importance of religion for each country in each year | |
attach(agg) | |
agg<-aggregate(relimpnum, by=list(nation1, year), FUN=mean, na.rm=TRUE) | |
detach(agg) | |
# Melt the data with nation and year as id variables (aggregate names them Group1 and Group2) | |
all.molten <- melt(agg, | |
id = c("Group.1", "Group.2"), na.rm=TRUE) | |
# Plot | |
ggplot(all.molten) + | |
geom_line(aes(x=Group.2, y=value, colour=variable)) + | |
theme(axis.text.x = element_text(angle = 75, hjust = 1)) + | |
facet_wrap(~Group.1) | |
# Re-plot cleaning things up | |
ggplot(subset(all.molten, Group.1!=c("PORTUGAL", "SPAIN"))) + # removing Portugal and Spain because they have no observations | |
geom_line(aes(x=Group.2, y=value, colour=variable)) + | |
theme(axis.text.x = element_text(angle = 75, hjust = 1)) + | |
facet_wrap(~Group.1) + | |
labs(x="Year", y="Importance of Religion", | |
title="Religion in European Countries, 1972-2002") + # Clean up axes and add a title | |
theme(legend.position="none") # hide legend because there is only one | |
### Estimate a regression model of intention to turnout in Great Britain | |
model<-lm(participnum ~ income + polintnum + medianum + year, data=gb) | |
summary(model) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See also http://jmrphy.net/blog/2016/12/06/a-quick-start-guide-to-the-statistical-analysis-of-political-attitudes-and-behaviors/