Distribution of Birth Year and Top 10 Participating Nations in Berlin Marathon 2014 setwd("~/") bm <- read.csv("BerlinMarathon2014.csv", header=T) library(ggplot2) p <- ggplot(bm, aes(birth_date, ..density..)) p <- p + geom_histogram(binwidth=1, colour = "black", fill = "lightblue") + geom_density() p + ggtitle("Distribution of Birth Year for the Berlin Marathon 2014") + xlab("Year of Birth") + ylab("Density") # get the top 10 particpating nations library(plyr) top10 <- ddply(bm, "country" ,summarise, count = length(country)) top10 <- as.data.frame(lapply(top10, unlist)) top10 <- head(arrange(top10, desc(count)), n=10) p <- ggplot(top10, aes(x = reorder(country, count, function(x) -x), y = count)) p <- p + geom_bar(colour="black", fill="lightblue", stat="identity", position="dodge") p + ggtitle("Top 10 Participating Nations in Berlin Marathon 2014") + xlab("Country") + ylab("Particpants")