Skip to content

Instantly share code, notes, and snippets.

@patperu
Created August 27, 2015 08:02
Show Gist options
  • Select an option

  • Save patperu/b55d14f200bd48e81e1f to your computer and use it in GitHub Desktop.

Select an option

Save patperu/b55d14f200bd48e81e1f to your computer and use it in GitHub Desktop.
STATISTICA SUI DATI DI GENERE - distribuzioni per sesso, età e cittadinanza per le 8 circoscrizione
library('dplyr')
library('reshape2')
library('ggplot2')
library('Hmisc')
options(stringsAsFactors=FALSE)
getFiles <- function(m) {
mydir<-"E:/github/ewpal"
temp<-tempfile(tmpdir=mydir, fileext=".zip")
download.file(m, temp)
unzip(temp, exdir=mydir)
unlink(temp) #delete the zip file
}
readFiles <- function(file) {
t <- read.csv2(file, skip = 1, header = FALSE,
stringsAsFactors = FALSE)
b <- unlist(strsplit(file, " ", fixed = TRUE))
b <- sub(".csv", "", b)
b <- data.frame(matrix(rep(b, each = nrow(t)), ncol = 4))
x <- data.frame(t, b)
return(x)
}
###############################################################################
files <- c("http://www.comune.palermo.it/js/server/uploads/opendata/_30072015131336.zip",
"http://www.comune.palermo.it/js/server/uploads/opendata/_30072015131412.zip",
"http://www.comune.palermo.it/js/server/uploads/opendata/_30072015131425.zip",
"http://www.comune.palermo.it/js/server/uploads/opendata/_30072015131435.zip")
# Read files
# lapply(files, getFiles)
# Read dir
m <- dir()
x <- do.call("rbind",
lapply(as.list(m[grep("circ", m)]), readFiles))
names(x) <- c("eta", "1 circ", "2 circ", "3 circ", "4 circ",
"5 circ", "6 circ", "7 circ", "8 circ",
"n.d.", "anno", "sesso", "origine", "region")
z <- do.call("rbind",
lapply(as.list(m[grep("quart", m)]), readFiles))
# löschen der Zeilen "n.d."
z1 <- melt(x, idvars=c("eta", "anno", "sesso", "origine", "region"), factorsAsStrings = FALSE)
z1$variable <- as.character(z1$variable)
str(z1)
sum(z1[z1$eta == "n.d.", "value"])
z2 <- tbl_df(z1) %>%
filter(!eta %in% c("n.d.", "Totale")) %>%
filter(!variable %in% c("n.d.")) %>%
group_by(anno, sesso, variable) %>%
summarise( pop = sum(value),
eta_media = wtd.mean(as.numeric(eta) + 0.5, value))
z2
ggplot(z2, aes(anno, eta_media, group = variable, color = variable)) + geom_line() + facet_wrap(~ sesso)
z2[z2$variable == "1 circ" & z2$sex == "maschi", ]
p1 <- ggplot(z2, aes(anno, eta_media)) + geom_boxplot(aes(fill = sesso))
p1 <- p1 + ggtitle('STATISTICA SUI DATI DI GENERE ANNO 2011 - 2014 - 8 circoscrizione // Età media')
p1 <- p1 + theme(plot.title = element_text(size=12, face="bold", vjust=1))
p1
ggsave(p1, file="eta_media_circo.pdf", units = "mm", width=297, height=210)
#
# FINI
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment