Created
August 18, 2016 17:18
-
-
Save loiyumba/5aaef1ba5b386b83386b4b6d1c43f2a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Fatalities in Terrorist Violence in India's Northeast 1992-2016 ### | |
## Date: 16-Aug-16 | |
# Set working directory | |
setwd("..\\manipur\\terror\\Data") | |
# Load the packages | |
require(rvest) | |
require(dplyr) | |
require(ggplot2) | |
require(tidyr) | |
# URL link where the data table is available | |
satpLink <- read_html("http://www.satp.org/satporgtp/countries/india/database/fatalitiesnorteast2006.htm") | |
# Getting data table from the web page | |
# Overall fatality | |
fatality <- satpLink %>% | |
html_nodes("table") %>% | |
.[[3]] %>% | |
html_table(fill = TRUE) | |
# 2016 | |
fatality2016 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[4]] %>% | |
html_table(fill = TRUE) | |
# 2015 | |
fatality2015 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[5]] %>% | |
html_table(fill = TRUE) | |
# 2014 | |
fatality2014 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[6]] %>% | |
html_table(fill = TRUE) | |
# 2013 | |
fatality2013 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[7]] %>% | |
html_table(fill = TRUE) | |
# 2012 | |
fatality2012 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[8]] %>% | |
html_table(fill = TRUE) | |
# 2011 | |
fatality2011 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[9]] %>% | |
html_table(fill = TRUE) | |
# 2010 | |
fatality2010 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[10]] %>% | |
html_table(fill = TRUE) | |
# 2009 | |
fatality2009 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[11]] %>% | |
html_table(fill = TRUE) | |
# 2008 | |
fatality2008 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[12]] %>% | |
html_table(fill = TRUE) | |
# 2007 | |
fatality2007 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[13]] %>% | |
html_table(fill = TRUE) | |
# 2006 | |
fatality2006 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[14]] %>% | |
html_table(fill = TRUE) | |
# 2005 | |
fatality2005 <- satpLink %>% | |
html_nodes("table") %>% | |
.[[15]] %>% | |
html_table(fill = TRUE) | |
# Cleaning data | |
fatalityOverall <- fatality[-c(1,27), ] | |
names(fatalityOverall) <- c("Year", "Civilians", "Forces", "Terrorists", "Total") | |
fatalityOverall$Year <- as.numeric(fatalityOverall$Year) | |
fatalityOverall$Total <- NULL | |
write.csv(fatalityOverall, "fatalityOverall.csv", row.names = FALSE) | |
fatality2005_2015 <- subset(fatalityOverall, Year >= 2005 & Year <= 2015) | |
fatality2005$X5 <- NULL | |
names(fatality2005) <- c("State", "Civilians", "Security", "Terrorists") | |
fatality2005 <- fatality2005[-c(1, 9), ] | |
clean_Data <- function(df){ | |
df$X5 <- NULL | |
names(df) <- c("State", "Civilians", "Security", "Terrorists") | |
df <- df[-c(1, 9), ] | |
df | |
} | |
fatality2006 <- clean_Data(fatality2006) | |
fatality2007 <- clean_Data(fatality2007) | |
fatality2008 <- clean_Data(fatality2008) | |
fatality2009 <- clean_Data(fatality2009) | |
fatality2010 <- clean_Data(fatality2010) | |
fatality2011 <- clean_Data(fatality2011) | |
fatality2012 <- clean_Data(fatality2012) | |
fatality2013 <- clean_Data(fatality2013) | |
fatality2014 <- clean_Data(fatality2014) | |
fatality2015 <- clean_Data(fatality2015) | |
fatality2016 <- clean_Data(fatality2016) | |
# Added Year column | |
fatality2005$Year <- 2005 | |
fatality2006$Year <- 2006 | |
fatality2007$Year <- 2007 | |
fatality2008$Year <- 2008 | |
fatality2009$Year <- 2009 | |
fatality2010$Year <- 2010 | |
fatality2011$Year <- 2011 | |
fatality2012$Year <- 2012 | |
fatality2013$Year <- 2013 | |
fatality2014$Year <- 2014 | |
fatality2015$Year <- 2015 | |
fatality2016$Year <- 2016 | |
# Merging Data | |
fatality10 <- rbind(fatality2005, fatality2006) | |
fatality10 <- rbind(fatality10, fatality2007) | |
fatality10 <- rbind(fatality10, fatality2008) | |
fatality10 <- rbind(fatality10, fatality2009) | |
fatality10 <- rbind(fatality10, fatality2010) | |
fatality10 <- rbind(fatality10, fatality2011) | |
fatality10 <- rbind(fatality10, fatality2012) | |
fatality10 <- rbind(fatality10, fatality2013) | |
fatality10 <- rbind(fatality10, fatality2014) | |
fatality10 <- rbind(fatality10, fatality2015) | |
fatality10 <- rbind(fatality10, fatality2016) | |
# Saving data in local disk | |
write.csv(fatality10, "neInsurgency05_16.csv", row.names = FALSE) | |
# Manipur | |
manipur <- subset(fatality10, State == "Manipur") | |
manipur$Civilians <- as.numeric(manipur$Civilians) | |
manipur$Security <- as.numeric(manipur$Security) | |
manipur$Terrorists <- as.numeric(manipur$Terrorists) | |
manipur <- manipur[1:11, ] | |
# Visualization | |
fatalityOverallLong <- gather(fatality2005_2015, "Fatality", "Count", 2:4) | |
fatalityOverallLong$Count <- as.numeric(fatalityOverallLong$Count) | |
windowsFonts(xkcd=windowsFont("Microsoft Tai Le")) | |
ggplot(fatalityOverallLong, aes(x = Year, y = Count, colour = Fatality)) + | |
geom_line(stat = "identity", size = 1.5) + | |
scale_x_continuous(breaks = seq(2005, 2015, 1)) + | |
scale_color_manual(values = c("firebrick", "gold", "midnightblue")) + | |
ggtitle("Fatalities in Northeast India due to Insurgency between 2005 - 2015") + | |
labs(x = "", y = "Fatality") + | |
theme_bw() + | |
theme(legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
manipurLong <- gather(manipur, "Fatality", "Count", 2:4) | |
windowsFonts(xkcd=windowsFont("Microsoft Tai Le")) | |
ggplot(manipurLong, aes(x = Year, y = Count, colour = Fatality)) + | |
geom_line(stat = "identity", size = 1.5) + | |
scale_x_continuous(breaks = seq(2005, 2015, 1)) + | |
scale_color_manual(values = c("firebrick", "gold", "midnightblue")) + | |
ggtitle("Insurgency Fatalities in Manipur between 2005 - 2015") + | |
labs(x = "", y = "Fatality") + | |
theme_bw() + | |
theme(legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
# Nagaland | |
nagaland <- subset(fatality10, State == "Nagaland") | |
nagaland$Civilians <- as.numeric(nagaland$Civilians) | |
nagaland$Security <- as.numeric(nagaland$Security) | |
nagaland$Terrorists <- as.numeric(nagaland$Terrorists) | |
nagaland <- nagaland[1:11, ] | |
nagalandLong <- gather(nagaland, "Fatality", "Count", 2:4) | |
windowsFonts(xkcd=windowsFont("Microsoft Tai Le")) | |
ggplot(nagalandLong, aes(x = Year, y = Count, colour = Fatality)) + | |
geom_line(stat = "identity", size = 1.5) + | |
scale_x_continuous(breaks = seq(2005, 2015, 1)) + | |
scale_color_manual(values = c("firebrick", "gold", "midnightblue")) + | |
ggtitle("Insurgency Fatalities in Nagaland between 2005 - 2015") + | |
labs(x = "", y = "Fatality") + | |
theme_bw() + | |
theme(legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
# Visualization by Fatality | |
fatalityNO2016 <- subset(fatality10, Year != "2016") | |
fatality10Long <- gather(fatalityNO2016, "Fatality", "Count", 2:4) | |
fatality10Long$Count <- as.numeric(fatality10Long$Count) | |
civilians <- subset(fatality10Long, Fatality == "Civilians") | |
windowsFonts(xkcd=windowsFont("Microsoft Tai Le")) | |
ggplot(civilians, aes(x = Year, y = Count, colour = State)) + | |
geom_line(stat = "identity", size = 1.5) + | |
scale_x_continuous(breaks = seq(2005, 2015, 1)) + | |
scale_color_manual(values = c("firebrick", "gold", "midnightblue", | |
"slategrey", "violetred4", | |
"orchid", "olivedrab1")) + | |
ggtitle("Civilian Fatalities due to Insurgency in NE India between 2005 - 2015") + | |
labs(x = "", y = "Fatality") + | |
theme_bw() + | |
theme(legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
security <- subset(fatality10Long, Fatality == "Security") | |
windowsFonts(xkcd=windowsFont("Microsoft Tai Le")) | |
ggplot(security, aes(x = Year, y = Count, colour = State)) + | |
geom_line(stat = "identity", size = 1.5) + | |
scale_x_continuous(breaks = seq(2005, 2015, 1)) + | |
scale_color_manual(values = c("firebrick", "gold", "midnightblue", | |
"slategrey", "violetred4", | |
"orchid", "olivedrab1")) + | |
ggtitle("Security Fatalities due to Insurgency in NE India between 2005 - 2015") + | |
labs(x = "", y = "Fatality") + | |
theme_bw() + | |
theme(legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
terrorist <- subset(fatality10Long, Fatality == "Terrorists") | |
windowsFonts(xkcd=windowsFont("Microsoft Tai Le")) | |
ggplot(terrorist, aes(x = Year, y = Count, colour = State)) + | |
geom_line(stat = "identity", size = 1.5) + | |
scale_x_continuous(breaks = seq(2005, 2015, 1)) + | |
scale_color_manual(values = c("firebrick", "gold", "midnightblue", | |
"slategrey", "violetred4", | |
"orchid", "olivedrab1")) + | |
ggtitle("Terrorist Fatalities due to Insurgency in NE India between 2005 - 2015") + | |
labs(x = "", y = "Fatality") + | |
theme_bw() + | |
theme(legend.position = "bottom", | |
legend.direction = "horizontal", | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
# Summarise of Fatality data | |
# Terrorist fatality | |
terrorist_fatality <- fatality10 %>% | |
group_by(State) %>% | |
summarise('TerroristFatality' = sum(Terrorists)) | |
ggplot(terrorist_fatality, aes(x = reorder(State, TerroristFatality), y = TerroristFatality)) + | |
geom_bar(stat = "identity", fill = "slategrey") + | |
coord_flip() + | |
theme_bw() + | |
ggtitle("Terrorists Fatality by State between 2005 - 2015") + | |
labs(x = "", y = "Fatality Count") + | |
theme(plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
# Civilian fatality | |
civilian_fatality <- fatality10 %>% | |
group_by(State) %>% | |
summarise(CivilianFatality = sum(Civilians)) | |
ggplot(civilian_fatality, aes(x = reorder(State, CivilianFatality), y = CivilianFatality)) + | |
geom_bar(stat = "identity", fill = "slategrey") + | |
coord_flip() + | |
theme_bw() + | |
ggtitle("Civilians Fatality by State between 2005 - 2015") + | |
labs(x = "", y = "Fatality Count") + | |
theme(plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
# Security fatality | |
security_fatality <- fatality10 %>% | |
group_by(State) %>% | |
summarise(SecurityFatality = sum(Security)) | |
ggplot(security_fatality, aes(x = reorder(State, SecurityFatality), y = SecurityFatality)) + | |
geom_bar(stat = "identity", fill = "slategrey") + | |
coord_flip() + | |
theme_bw() + | |
ggtitle("Security Force Personnel Fatality by State between 2005 - 2015") + | |
labs(x = "", y = "Fatality Count") + | |
theme(plot.title = element_text(hjust = -0.002, family = "Microsoft Tai Le"), | |
text = element_text(family = "Microsoft Tai Le"), | |
axis.text = element_text(size = 12), | |
axis.title = element_text(size = 14), | |
panel.grid.minor = element_blank()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment