Last active
August 29, 2015 13:57
-
-
Save franvillamil/9546627 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("...") | |
library(ggplot2) | |
## DATA: Ron Francisco's European Protest and Coercion Data. | |
## Downloaded (.xls files) from http://web.ku.edu/~ronfrand/data/ and converted into .csv with MS Excel. | |
# Defining main dataframe and specifying country codes | |
data = data.frame(date = c(), action = c(), freq = c(), country = c()) | |
countrycodes = c("Albania80-88", "Albania89-95", "Austria80-95", "Belgium80-87", "Belgium88-95", | |
"Bulgaria80-89", "Bulgaria90-95", "Cyprus80-95", "Denmark80-86", "Finland80-95", | |
"France80-83", "France84-86", "France87-89", "France90-92", "France93-95", | |
"FRG80-83", "FRG84-86", "FRG87-89", "FRG90-92", "FRG93-95", "Greece80-95", | |
"Iceland80-95", "Ireland80-83", "Ireland84-86", "Ireland87-89", | |
"Ireland90-92", "Ireland93-95", "Italy80-83", "Italy84-87", "Italy88-91", | |
"Italy92-95", "Luxembourg80-95", "Netherlands80-95", "NorthernIreland80-83", | |
"NorthernIreland84-86", "NorthernIreland87-89", "NorthernIreland90-92", | |
"NorthernIreland93-95", "Norway80-95", "Poland80-81", "Poland82-83", "Poland84-85", | |
"Poland86-87", "Poland88-89", "Poland90-92", "Poland93-95", "Portugal80-87", | |
"Portugal88-95", "Romania80-95", "Spain80-83", "Spain84-86", "Spain87-89", | |
"Spain90-92", "Spain93-95", "Sweden80-95", "Switzerland80-95", "UK80-83", | |
"UK84-86", "UK87-89", "UK90-92", "UK93-95") | |
### MISSING: Czechoslovakia & GDR & Hungary (n=24!) (remove all comunists?) | |
# Reading data from country CSVs | |
# (NOTE: data files must be in "data" folder inside working directory) | |
for (j in 1:length(countrycodes)){ | |
# Getting the file name and reading the CSV | |
filename = paste("data/", countrycodes[j], ".csv", sep="") | |
i = read.csv(file = filename, header = TRUE) | |
# Removing all columns but date and action type | |
i = i[,c(1,3)] | |
names(i)[c(1,2)] = c("date", "action") | |
# Changing action names: demonstrations, strikes, and others | |
levels(i$action)[which(levels(i$action) == "demonstrations")] = "demonstration" | |
levels(i$action)[which(levels(i$action) == "general strike")] = "strike" | |
levels(i$action)[which(levels(i$action) == "strike ")] = "strike" | |
levels(i$action)[which(levels(i$action)!= "demonstration" & levels(i$action) != "strike")] = "other" | |
# Getting the month from date column (output: chr variable) | |
i$date = as.Date(i$date, "%d-%b-%y") | |
i$date = format(i$date, "%Y-%m") | |
# Getting monthly frequencies | |
i = as.data.frame(table(i$date, i$action)) | |
# Removing "other" events | |
i = i[(i$Var2 == "demonstration" | i$Var2 == "strike"),] | |
# Changing variables names | |
names(i)[1:3] = c("date", "action", "freq") | |
# Getting country name from file name and adding a fourth column | |
countryname = substr(filename, 6, (nchar(filename)-9)) | |
i = cbind(i, country = rep(countryname, length(i$date))) | |
# Add to main dataframe | |
data = rbind(data, i) | |
} | |
# Removing 2 cases wrong coded in 2066 | |
data = data[-which(data$date == "2066-02"),] | |
# Cleaning workspace and removing "other" level from action variable | |
rm(countrycodes, j, i, filename, countryname) | |
data$action = factor(data$action) | |
# Turning date variable into date class for plotting | |
data$date = as.character(data$date) | |
data$date = as.Date(x = paste("01-", data$date, sep=""), format = "%d-%Y-%m") | |
# Plot protest data through time, separating strikes from demonstrations | |
protest.plot = ggplot(data, aes(x = date, y = freq)) + geom_histogram(stat = "identity", binwidth=1) + | |
scale_x_date(breaks = "3 months", | |
minor_breaks = "1 month", | |
labels = date_format("%b %Y"), | |
limits = c(as.Date("1980-07-01"), as.Date("1995-04-01"))) + | |
theme(axis.text.x = element_text(angle = 90), | |
plot.title = element_text(face="bold")) + | |
ylab("") + xlab("") + | |
ggtitle("MONTHLY NUMBER OF DEMONSTRATIONS AND STRIKES IN EUROPE, 1980-1995\n | |
(Albania, Austria, Belgium, Bulgaria, Cyprus, Denmark, Finland, France, FR Germany, Greece, Iceland, Ireland, | |
Italy, Luxembourg, Netherlands, N. Ireland, Norway, Poland, Portugal, Romania, Spain, Sweden, Switzerland, UK)\n") + | |
facet_wrap( ~ action, ncol = 1, scales = "free_y") | |
# Plot dependent variable (monthly freq of demonstrations/strikes) | |
DV.plot = ggplot(data[data$freq<101,], aes(x = freq)) + | |
geom_histogram(binwidth=1) + | |
ggtitle("Dependent variable - freq distribution\n(Removed 21 cases above 100 from plot)") + | |
xlab("Monthly frequency of demonstrations and strikes in each country")+ylab("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment