Skip to content

Instantly share code, notes, and snippets.

@franvillamil
Last active August 29, 2015 13:57
Show Gist options
  • Save franvillamil/9546627 to your computer and use it in GitHub Desktop.
Save franvillamil/9546627 to your computer and use it in GitHub Desktop.
setwd("...")
library(ggplot2)
## DATA: Ron Francisco's European Protest and Coercion Data.
## Downloaded (.xls files) from http://web.ku.edu/~ronfrand/data/ and converted into .csv with MS Excel.
# Defining main dataframe and specifying country codes
data = data.frame(date = c(), action = c(), freq = c(), country = c())
countrycodes = c("Albania80-88", "Albania89-95", "Austria80-95", "Belgium80-87", "Belgium88-95",
"Bulgaria80-89", "Bulgaria90-95", "Cyprus80-95", "Denmark80-86", "Finland80-95",
"France80-83", "France84-86", "France87-89", "France90-92", "France93-95",
"FRG80-83", "FRG84-86", "FRG87-89", "FRG90-92", "FRG93-95", "Greece80-95",
"Iceland80-95", "Ireland80-83", "Ireland84-86", "Ireland87-89",
"Ireland90-92", "Ireland93-95", "Italy80-83", "Italy84-87", "Italy88-91",
"Italy92-95", "Luxembourg80-95", "Netherlands80-95", "NorthernIreland80-83",
"NorthernIreland84-86", "NorthernIreland87-89", "NorthernIreland90-92",
"NorthernIreland93-95", "Norway80-95", "Poland80-81", "Poland82-83", "Poland84-85",
"Poland86-87", "Poland88-89", "Poland90-92", "Poland93-95", "Portugal80-87",
"Portugal88-95", "Romania80-95", "Spain80-83", "Spain84-86", "Spain87-89",
"Spain90-92", "Spain93-95", "Sweden80-95", "Switzerland80-95", "UK80-83",
"UK84-86", "UK87-89", "UK90-92", "UK93-95")
### MISSING: Czechoslovakia & GDR & Hungary (n=24!) (remove all comunists?)
# Reading data from country CSVs
# (NOTE: data files must be in "data" folder inside working directory)
for (j in 1:length(countrycodes)){
# Getting the file name and reading the CSV
filename = paste("data/", countrycodes[j], ".csv", sep="")
i = read.csv(file = filename, header = TRUE)
# Removing all columns but date and action type
i = i[,c(1,3)]
names(i)[c(1,2)] = c("date", "action")
# Changing action names: demonstrations, strikes, and others
levels(i$action)[which(levels(i$action) == "demonstrations")] = "demonstration"
levels(i$action)[which(levels(i$action) == "general strike")] = "strike"
levels(i$action)[which(levels(i$action) == "strike ")] = "strike"
levels(i$action)[which(levels(i$action)!= "demonstration" & levels(i$action) != "strike")] = "other"
# Getting the month from date column (output: chr variable)
i$date = as.Date(i$date, "%d-%b-%y")
i$date = format(i$date, "%Y-%m")
# Getting monthly frequencies
i = as.data.frame(table(i$date, i$action))
# Removing "other" events
i = i[(i$Var2 == "demonstration" | i$Var2 == "strike"),]
# Changing variables names
names(i)[1:3] = c("date", "action", "freq")
# Getting country name from file name and adding a fourth column
countryname = substr(filename, 6, (nchar(filename)-9))
i = cbind(i, country = rep(countryname, length(i$date)))
# Add to main dataframe
data = rbind(data, i)
}
# Removing 2 cases wrong coded in 2066
data = data[-which(data$date == "2066-02"),]
# Cleaning workspace and removing "other" level from action variable
rm(countrycodes, j, i, filename, countryname)
data$action = factor(data$action)
# Turning date variable into date class for plotting
data$date = as.character(data$date)
data$date = as.Date(x = paste("01-", data$date, sep=""), format = "%d-%Y-%m")
# Plot protest data through time, separating strikes from demonstrations
protest.plot = ggplot(data, aes(x = date, y = freq)) + geom_histogram(stat = "identity", binwidth=1) +
scale_x_date(breaks = "3 months",
minor_breaks = "1 month",
labels = date_format("%b %Y"),
limits = c(as.Date("1980-07-01"), as.Date("1995-04-01"))) +
theme(axis.text.x = element_text(angle = 90),
plot.title = element_text(face="bold")) +
ylab("") + xlab("") +
ggtitle("MONTHLY NUMBER OF DEMONSTRATIONS AND STRIKES IN EUROPE, 1980-1995\n
(Albania, Austria, Belgium, Bulgaria, Cyprus, Denmark, Finland, France, FR Germany, Greece, Iceland, Ireland,
Italy, Luxembourg, Netherlands, N. Ireland, Norway, Poland, Portugal, Romania, Spain, Sweden, Switzerland, UK)\n") +
facet_wrap( ~ action, ncol = 1, scales = "free_y")
# Plot dependent variable (monthly freq of demonstrations/strikes)
DV.plot = ggplot(data[data$freq<101,], aes(x = freq)) +
geom_histogram(binwidth=1) +
ggtitle("Dependent variable - freq distribution\n(Removed 21 cases above 100 from plot)") +
xlab("Monthly frequency of demonstrations and strikes in each country")+ylab("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment