Skip to content

Instantly share code, notes, and snippets.

@ulfelder
Last active May 31, 2016 11:59
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save ulfelder/5505468 to your computer and use it in GitHub Desktop.
R code for selecting events from GDELT files that look like they might be atrocities (i.e., lethal violence against civilians), summing them by location, and mapping them.
# Housekeeping: clear workspace and load packages
rm(list=ls(all=TRUE))
library(lubridate)
library(RgoogleMaps)
library(ggmap)
library(mapproj)
library(ggplot2)
library(plyr)
# Load 2011 GDELT data from http://eventdata.psu.edu/data.dir/GDELT.html
global.2011 <- read.delim('2011.reduced.txt', header=T)
# Create integer for later counting
global.2011$count <- 1
# Split Day variable into separate year, month, day vars for later summing
splitdate <- function(df) {
year <- as.numeric(substr(as.character(df$Day),1,4))
month <- as.numeric(substr(as.character(df$Day),5,6))
day <- as.numeric(substr(as.character(df$Day),7,8))
newdat <- as.data.frame(cbind(df,year,month,day))
return(newdat)
}
global.2011 <- splitdate(global.2011)
# Fix variable type for lat and lon for mapping, etc.
global.2011$Actor1Geo_Lat <- as.numeric(as.character(global.2011$Actor1Geo_Lat))
global.2011$Actor1Geo_Long <- as.numeric(as.character(global.2011$Actor1Geo_Long))
# Filtering functions for possible atrocities w/state & non-state perpetrators
# where 'df' is data frame from GDELT reduced file
atr.state <- function(df) {
subset(df,
( EventCode==180 | EventCode==1823 | EventCode==190 |
EventCode==193 | EventCode==194 | EventCode==195 |
EventCode==200 | EventCode==201 | EventCode==202 |
EventCode==203 | EventCode==204 | EventCode==205 ) &
((substr(Actor2Code,1,3)=="CVL" | substr(Actor2Code,1,3)=="OPP" |
substr(Actor2Code,1,3)=="EDU" | substr(Actor2Code,1,3)=="LAB" |
substr(Actor2Code,1,3)=="REL" | substr(Actor2Code,1,3)=="HLH" |
substr(Actor2Code,1,3)=="REF" | substr(Actor2Code,1,3)=="MED" ) |
(substr(Actor2Code,4,6)=="CVL" | substr(Actor2Code,4,6)=="OPP" |
substr(Actor2Code,4,6)=="EDU" | substr(Actor2Code,4,6)=="LAB" |
substr(Actor2Code,4,6)=="REL" | substr(Actor2Code,4,6)=="HLH" |
substr(Actor2Code,4,6)=="REF" | substr(Actor2Code,4,6)=="MED" )) &
((substr(Actor1Code,1,3)=="GOV" | substr(Actor1Code,1,3)=="MIL" |
substr(Actor1Code,1,3)=="COP" | substr(Actor1Code,1,3)=="SPY") |
(substr(Actor1Code,4,6)=="GOV" | substr(Actor1Code,4,6)=="MIL" |
substr(Actor1Code,4,6)=="COP" | substr(Actor1Code,4,6)=="SPY" )) )
}
atr.rebel <- function(df) {
subset(df,
( EventCode==180 | EventCode==1823 |
EventCode==190 | EventCode==193 | EventCode==194 | EventCode==195 |
EventCode==200 | EventCode==201 | EventCode==202 | EventCode==203 |
EventCode==204 | EventCode==205 ) &
((substr(Actor2Code,1,3)=="CVL" | substr(Actor2Code,1,3)=="OPP" |
substr(Actor2Code,1,3)=="EDU" | substr(Actor2Code,1,3)=="LAB" |
substr(Actor2Code,1,3)=="REL" | substr(Actor2Code,1,3)=="HLH" |
substr(Actor2Code,1,3)=="REF" | substr(Actor2Code,1,3)=="MED" ) |
(substr(Actor2Code,4,6)=="CVL" | substr(Actor2Code,4,6)=="OPP" |
substr(Actor2Code,4,6)=="EDU" | substr(Actor2Code,4,6)=="LAB" |
substr(Actor2Code,4,6)=="REL" | substr(Actor2Code,4,6)=="HLH" |
substr(Actor2Code,4,6)=="REF" | substr(Actor2Code,4,6)=="MED" )) &
((substr(Actor1Code,1,3)=="REB" | substr(Actor1Code,1,3)=="SEP") |
(substr(Actor1Code,4,6)=="REB" | substr(Actor1Code,4,6)=="SEP") ) )
}
# Apply filtering functions to 2011 data
state.2011 <- atr.state(global.2011)
rebel.2011 <- atr.rebel(global.2011)
# Sum by source/target/event combo, sort in descending order, and inspect top 30
state.triad.2011 <- ddply(state.2011, .(Actor1Code, Actor2Code, EventCode), summarize, count = sum(count) )
rebel.triad.2011 <- ddply(rebel.2011, .(Actor1Code, Actor2Code, EventCode), summarize, count = sum(count) )
state.triad.2011 <- state.triad.2011[order(-state.triad.2011$count),]
rebel.triad.2011 <- rebel.triad.2011[order(-rebel.triad.2011$count),]
print(state.triad.2011[1:30,])
print(rebel.triad.2011[1:30,])
# Sum by geocoded location for mapping, locate countries, inspect top 30
state.loc.2011 <- ddply(state.2011, .(Actor1Geo_Lat, Actor1Geo_Long), summarize, count = sum(count) )
rebel.loc.2011 <- ddply(rebel.2011, .(Actor1Geo_Lat, Actor1Geo_Long), summarize, count = sum(count) )
state.loc.2011$where[2:length(state.loc.2011$count)] <- map.where(database = "world",
state.loc.2011$Actor1Geo_Long[2:length(state.loc.2011$count)],
state.loc.2011$Actor1Geo_Lat[2:length(state.loc.2011$count)])
rebel.loc.2011$where[2:length(rebel.loc.2011$count)] <- map.where(database = "world",
rebel.loc.2011$Actor1Geo_Long[2:length(rebel.loc.2011$count)],
rebel.loc.2011$Actor1Geo_Lat[2:length(rebel.loc.2011$count)])
print(state.loc.2011[1:30,])
print(rebel.loc.2011[1:30,])
# Map: Africa
map <- get_map(location = c(lon = 18.35, lat = 4.22), source = "google",
maptype = "terrain", color = "bw", zoom = 3)
print(ggmap(map) +
geom_point(data = rebel.loc.2011, position = "jitter",
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count),
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") +
geom_point(data = state.loc.2011, position = "jitter",
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count),
alpha = 0.5, pch = 21, colour = "black", fill = "red2"))
# Map: Asia and Middle East
map <- get_map(location = c(lon = 77.12, lat = 28.36), source = "google",
maptype = "terrain", color = "bw", zoom = 3)
print(ggmap(map) +
geom_point(data = rebel.loc.2011, position = "jitter",
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count),
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") +
geom_point(data = state.loc.2011, position = "jitter",
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count),
alpha = 0.5, pch = 21, colour = "black", fill = "red2"))
# Map: Syria
lat <- c(32,38)
lon <- c(35,43)
map <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 7,
maptype = "terrain", source = "google", color = "bw")
print(ggmap(map) +
geom_point(data = rebel.loc.2011, position = "jitter",
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count),
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") +
geom_point(data = state.loc.2011, position = "jitter",
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count),
alpha = 0.5, pch = 21, colour = "black", fill = "red2"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment