Skip to content

Instantly share code, notes, and snippets.

@felixhaass
Last active January 8, 2018 20:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save felixhaass/5766725 to your computer and use it in GitHub Desktop.
Save felixhaass/5766725 to your computer and use it in GitHub Desktop.
library(ggplot2)
library(Cairo)
library(scales)
library(RCurl)
library(rjson)
#################################
# Data preparation from web API #
#################################
# get raw drone data
drones <- fromJSON(getURL("http://api.dronestre.am/data"))
# The following code prepares three different data frames:
# (1) drone_data => raw drone data as data frame & exported as .csv
# (2) drone_sum => a sum of drone strikes per month & and monthly fatalities
# (3) drone_sum_month => time series of all drone strikes per month, including
# months in which nothing happened
## (1) Create "drone_data" and "Drone Data.csv"
# create empty data frame
drone_data <- data.frame(t(rep(NA,length(drones$strike[[1]]))), stringsAsFactors=FALSE)
names(drone_data) <- names(drones$strike[[1]])
for (i in 1:length(drones$strike)) {
# clean up empty/useless JSON entries and fill with 0
drones$strike[[i]][which(drones$strike[[i]]=="" | drones$strike[[i]]=="?")] <- 0
drones$strike[[i]][which(drones$strike[[i]]=="list()")] <- 0
# fill data frame
drone_data[i, ] <- drones$strike[[i]]
}
# convert dates
drone_data$date <- as.Date(drone_data$date, "%Y-%m-%d")
write.table(drone_data, "./output/Drone_Data.csv", sep="\t", col.names=TRUE,
row.names=FALSE, quote=FALSE, qmethod="double", eol="\n", dec = ".")
## (2) create drone_sum
# aggregate drone strikes per month
drone_data$dummy <- 1
# fix data issues
drone_data <- drone_data[-336, ] # wrong/useless data entry
# March 2014 dates are currently (20 March 2014) wrong,
# you should check if that applies in the future
drone_data[drone_data$date > as.Date("2014-04-01"), "date"] <- c(rep("2014-03-01", 4))
drone_sum <- data.frame(CountMonth=rowsum(drone_data$dummy, format(drone_data$date,"%Y-%m")),
Dates=levels(factor(format(drone_data$date,"%Y-%m"))),
MonthFat=rowsum(as.integer(drone_data$deaths_min), format(drone_data$date,"%Y-%m"), na.rm=T),
MonthFatMax=rowsum(as.integer(drone_data$deaths_max), format(drone_data$date,"%Y-%m"), na.rm=T),
stringsAsFactors=F)
# convert dates in summary file for better plotting
drone_sum$Dates <- as.Date(paste(drone_sum$Dates, "-1", sep=""))
## (3) create drone_sum_month
# length.out argument is quite clumsy to get the correct number of months
months <- seq(as.Date("2002/11/1"),
by = "month",
length.out = as.numeric(round(difftime(Sys.Date(), as.Date("2002-11-1"), units=c("days"))/30.416)))
drone_sum_month <- data.frame(Dates=months)
drone_sum_month <- merge(drone_sum, drone_sum_month, by="Dates")
#########################
# Change point analysis #
#########################
library(changepoint)
library(fields) # for xline
library(Cairo)
library(car)
# calculate changepoints
for(i in 20:1) {
CairoPNG(file=paste0("./figs/changepoint", i,".png"), pointsize=11, width=1280, height=522 )
mean.drone <- cpt.mean(drone_sum_month$CountMonth, method='BinSeg', Q=i)
par(mar=c(6, 4, 4, 2))
plot(mean.drone,type='l',cpt.col='red',xlab='',ylab='Frequency of US drone strikes',cpt.width=2, main="Changepoints in US drone strike activity", xaxt="n")
axis(1, at=cpts(mean.drone), labels=format.Date(drone_sum_month$Dates[cpts(mean.drone)], "%b-%y "), las=3)
xline(which.names("2009-01-01", drone_sum_month$Dates), lty=2, col="blue3", lwd=2) # 1st Obama presidency
xline(which.names("2013-01-01", drone_sum_month$Dates), lty=3, col="blue3", lwd=2) # 2nd Obama presidency
xline(which.names("2013-06-01", drone_sum_month$Dates), lty=4, col="chartreuse4", lwd=2) # promise to reduce drone strikes
dev.off()
}
# Create single plots
CairoPNG(file=paste0("./figs/changepoint_5.png"), pointsize=11, width=1280, height=522 )
mean.drone <- cpt.mean(drone_sum_month$CountMonth, method='BinSeg', Q=5)
par(mar=c(6, 4, 4, 2))
plot(mean.drone,type='l',cpt.col='red',xlab='',ylab='Frequency of US drone strikes',cpt.width=2, main="Changepoints in US drone strike activity", xaxt="n")
axis(1, at=cpts(mean.drone), labels=format.Date(drone_sum_month$Dates[cpts(mean.drone)], "%b-%y "), las=3)
xline(which.names("2009-01-01", drone_sum_month$Dates), lty=2, col="blue3", lwd=2) # 1st Obama presidency
xline(which.names("2013-01-01", drone_sum_month$Dates), lty=3, col="blue3", lwd=2) # 2nd Obama presidency
xline(which.names("2013-06-01", drone_sum_month$Dates), lty=4, col="chartreuse4", lwd=2) # promise to reduce drone strikes
dev.off()
#########
# Plots #
#########
# reassign date-class for correct order in ggplot2
drone_sum_month$Dates <- as.POSIXct(drone_sum_month$Dates)
#plot number of drone strikes per month, using Cairo device
CairoPNG(file="./figs/strikes_per_month.png", width=1280, height=522 )
a <- ggplot(drone_sum_month, aes(Dates, CountMonth))
a + geom_line(stat="identity") +
labs(title="Frequency of US Drone Strikes (Monthly), 2002-2013\n", y="Frequency", x="")+
theme_bw() +
theme(axis.text.x=element_text(hjust=1.1, angle=45), legend.key=element_blank()) +
# the vertical line represents the start of the Obama presidency
geom_vline(xintercept=as.numeric(as.POSIXct("2009-01-01")), linetype ="longdash") +
scale_x_datetime(breaks="6 months", labels = date_format("%Y %B"))
dev.off()
#plot drones strike fatalities per month (low and high estimate)
CairoPNG(file="./figs/fat_per_month.png", width=1280, height=522)
b <- ggplot(drone_sum_month, aes(x=Dates))
b +
geom_line(aes(y=MonthFat, colour = "Low Estimate")) +
geom_line(aes(y=MonthFatMax, colour ="High Estimate")) +
scale_colour_manual("",
breaks = c("Low Estimate", "High Estimate"),
values = c("red", "black")) +
labs(title="Monthly Fatalities by US Drone Strikes, 2002-2013\n", y="Monthly Fatalities", x="")+
theme_bw() +
theme(axis.text.x=element_text(hjust=1.1, angle=45), legend.key=element_blank()) +
# the vertical line represents the start of the Obama presidency
geom_vline(xintercept=as.numeric(as.POSIXct("2009-01-01")), linetype = "longdash") +
scale_x_datetime(breaks="6 months", labels = date_format("%Y %B"))
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment