Skip to content

Instantly share code, notes, and snippets.

View cimentadaj's full-sized avatar

Jorge Cimentada cimentadaj

View GitHub Profile
pollutantmean <- function(directory, pollutant, id = 1:332) {
data.files <- list.files(directory, full.names = T)
alldata <- data.frame()
for(i in 1:332) {
alldata <- rbind(alldata, read.csv(data.files[i]))
}
datasubset <- subset(alldata[,"sulfate"] == pollutant && alldata[,"nitrate"] == pollutant && alldata[,"ID"] = id )
mean(datasubset$pollutant, na.rm = T)
}
# Define the function
ggd.qqplot = function(pvector, main=NULL, ...) {
o = -log10(sort(pvector,decreasing=F))
e = -log10( 1:length(o)/length(o) )
plot(e,o,pch=19,cex=1, main=main, ...,
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
xlim=c(0,max(e)), ylim=c(0,max(e)))
lines(e,e,col="red")
}
# Define the function
ggd.qqplot = function(pvector, main=NULL, ...) {
o = -log10(sort(pvector,decreasing=F))
e = -log10( 1:length(o)/length(o) )
plot(e,o,pch=19,cex=1, main=main, ...,
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
xlim=c(0,max(e)), ylim=c(0,max(e)))
lines(e,e,col="red")
}
library(UsingR); data(galton); library(reshape); long <- melt(galton)
g <- ggplot(long, aes(x = value, fill = variable))
g <- g + geom_histogram(colour="black", binwidth = 1)
g <- g + facet_grid(. ~variable)
g
# First let's download some data files which we will use in the tutorial.
library(ggplot2) # install.packages("ggplot2") if you don't have the package
library(downloader) # install.packages("downloader") if you don't have the package
# Create and set a new directory for the tutorial
dir.create(paste0(getwd(),"/ggplot2tutorial"))
setwd(paste0(getwd(),"/ggplot2tutorial"))
# Download the zip file with all the data and unzip it to the tutorial folder
graph1 <- ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost))
graph1 + geom_point()
## Two continuous variables
ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost)) + geom_point()
ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost)) + geom_line()
## One continuous and one categorical
ggplot(data=housing, aes(x = region, y= Home.Value)) + geom_boxplot()
## One categorical
ggplot(data=housing, aes(x = region)) + geom_bar()
## One continuous
ggplot(data=housing, aes(x = Home.Value)) + geom_histogram()
ggplot(data=housing, aes(x = Land.Value)) + geom_dotplot()
## This plots the distribution of the Structure.Cost.
ggplot(housing, aes(x = Structure.Cost, color=region)) + geom_bar()
## geom_bar automatically transforms the continuous variables into 'bins' and then plots
## the total number of 'bins' and its total counts. This package has a special function
## that does that called stat_bin().
## What will the output of this code be?
ggplot(housing, aes(x = Structure.Cost, color=region)) + stat_bin()
## Two continuous variables
ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost)) + stat_identity() # Is the same as geom_point()
ggplot(housing, aes(x = Home.Value, y= Structure.Cost, color=region)) + stat_smooth() # Is the same as geom_smooth()
## One continuous and one categorical
ggplot(data=housing, aes(x = region, y= Home.Value)) + stat_boxplot() # Is the same as geom_boxplot()
ggplot(housing, aes(x = region, y= Structure.Cost, color=region)) + stat_ydensity() # Is the same as geom_violin()
## One categorical
ggplot(data=housing, aes(x = region)) + stat_count() # Is the same as geom_bar()