Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#Explorative Visual Analysis
#Check on the data
names(diamonds)
head(diamonds, n=10)
str(diamonds)
summary(diamonds)
#Make sure you've set your prefered theme as a default
#One dimenisonal distributions
#Frequency Ploygon
#Checking the options
?geom_freqpoly
#Simple:carat - play around with binwidth
ggplot(diamonds, aes(carat)) +
geom_freqpoly(colour = main2_color)
#Changing the binwidth
ggplot(diamonds, aes(carat)) +
geom_freqpoly(colour = main2_color, binwidth = 0.001)
#How to zoom by defining the limits for the x axis
ggplot(diamonds, aes(carat, colour = cut)) +
geom_freqpoly() +
scale_color_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd"))
#How to zoom by defining the limits for the x axis
ggplot(diamonds, aes(carat, colour = cut)) +
geom_freqpoly() +
scale_color_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd")) +
xlim(0, 2)
#Histogram
#Checking the options
?geom_histogram
#Simple:carat - the same with a histogram
ggplot(diamonds, aes(carat)) +
geom_histogram(colour = main2_color, fill = NA, binwidth = 0.01) +
xlim(0, 2)
#Histogram for different cut options
ggplot(diamonds, aes(carat, fill = cut)) +
geom_histogram(position = "dodge") +
scale_fill_manual(values=c("#478adb", "#f20675", "#1ce3cd", "#bcc048", "#cccccc"))
#The whole idea of the grammar of graphs
ggplot(diamonds, aes(carat, colour = cut)) +
geom_histogram(colour=decoration_color, fill = decoration_color, alpha = 0.2, size =0) +
geom_freqpoly()+
scale_colour_manual(values=c("#478adb", "#f20675", "#1ce3cd", "#bcc048", "#cccccc"))
#Introducing the color package
library(RColorBrewer)
head(brewer.pal.info, 20)
#Color set - colour brewer
ggplot(diamonds, aes(carat, colour = cut)) +
scale_colour_brewer(palette = "Accent") +
geom_histogram(colour=decoration_color, fill=decoration_color, alpha=0.2, size=0) +
geom_freqpoly()
#Colors in revers order
ggplot(diamonds, aes(carat, colour = cut)) +
scale_colour_brewer(palette = "Accent", direction = -1) +
geom_histogram(colour=decoration_color, fill=decoration_color, alpha=0.2, size=0) +
geom_freqpoly()
#Color set - colour brewer
ggplot(diamonds, aes(carat, colour = cut)) +
scale_colour_brewer(palette = "Set1") +
geom_histogram(colour=decoration_color, fill=decoration_color, alpha = 0.07, size =0) +
geom_freqpoly()
#Zoom in again
ggplot(diamonds, aes(carat, colour = cut)) +
geom_histogram(colour=decoration_color, fill=decoration_color, alpha=0.2, size=0) +
geom_freqpoly() +
scale_colour_manual(values=c("#478adb", "#1ce3cd", "#f206d3", "#bcc048", "#cccccc")) +
xlim(0, 2)
#Multiple: carat - histogram for the different cuts to facilitate comparison: first the simple version
ggplot(diamonds, aes(carat)) +
geom_histogram(binwidth = 0.2, fill = main2_color) +
facet_wrap(. ~ cut)
#Small multiple histogram for carat by the different cuts to facilitate comparison: first the simple version
ggplot(diamonds, aes(carat, fill = cut)) +
geom_histogram(binwidth = 0.2) +
scale_fill_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd")) +
facet_wrap(. ~ cut)
#Desnsity chart
#Checking the options
?geom_density
#Simple:carat - the same with a density chart
ggplot(diamonds, aes(price)) +
geom_density(fill = main2_color, color= NA)
#Multiple density chart
ggplot(data=diamonds, aes(price, group=cut, fill=cut)) +
geom_density(adjust=1.5 , color= NA) +
scale_fill_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd"))
#Multiple density chart, with using one colour and transparency we can identify easly the overlap as a more dense part through all cuts
ggplot(data=diamonds, aes(price, group=cut, fill=cut)) +
geom_density(adjust=1.5 , color= NA, fill=main2_color, alpha =0.1)
#Small multiple desnity for carat by the different cuts
ggplot(diamonds, aes(price, stat(density), fill=cut)) +
geom_density(binwidth = 0.2, color= NA) +
scale_fill_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd")) +
facet_wrap(. ~ cut)
#Ridgeline plot
#Checking the options
?geom_density_ridges
#Another way of creating small multiples is using the ridgeline plot
ggplot(diamonds, aes(x = price, y = cut)) +
geom_density_ridges(fill=main2_color, color=fill_color)
#Encoding color
ggplot(diamonds, aes(x = price, y = cut, fill=cut)) +
geom_density_ridges(color=NA) +
scale_fill_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd"))
#Introducing the scaling factor
ggplot(diamonds, aes(x = price, y = cut, fill = cut)) +
geom_density_ridges(color=fill_color, scale = 8, size=0.1) +
scale_fill_manual(values=c("#478adb", "#cccccc", "#f20675", "#bcc048", "#1ce3cd"))
#Relationship analysis
#Scatterplot
#Checking the options
?geom_point
#Basic scatterplot
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(color=main2_color)
#Basic scatterplot - adjusting the size
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(size=0.02, color=main2_color)
#Basic scatterplot - adjusting the opacity
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(size=0.02, alpha=0.09, color=main2_color)
#Axis labeling depending on the quantiles
ggplot(diamonds, aes(carat, price)) +
geom_point(size=0.02, alpha=0.09, color=main2_color) +
xlab("")+
ylab("")+
scale_x_continuous(breaks = round(as.vector(quantile(diamonds$carat)), digits = 1))+
scale_y_continuous(breaks = round(as.vector(quantile(diamonds$price)), digits = 1))
#Adding price as another visual encoding using a colour code
ggplot(diamonds, aes(x=carat, y=price, colour = price)) +
geom_point(size=0.04, alpha=0.12)+
scale_colour_gradient(low = main1_color, high = main2_color)
#Another way to handle big datasets is to create a sample
diamonds_sample <- diamonds[sample(nrow(diamonds), 500),]
#Basic scatterplot
ggplot(diamonds_sample, aes(x=carat, y=price)) +
geom_point(color=main2_color)
#Basic scatterplot - colour as visual encoding redundante
ggplot(diamonds, aes(x=carat, y=price, color=price)) +
geom_point(size=0.02)
#Basic scatterplot - size as visual encoding redundante
ggplot(diamonds, aes(x=carat, y=price, size=price)) +
geom_point(alpha=0.09, color=main2_color)
#Change the position scale to logarithmic scaling
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(size=0.1, alpha=0.09, color=main2_color) +
scale_y_log10() +
scale_x_log10()
#Axis labeling depending on the quantiles
ggplot(diamonds, aes(carat, price)) +
geom_point(size=0.02, alpha=0.09, color=main2_color) +
xlab("")+
ylab("")+
scale_x_continuous(breaks = round(as.vector(quantile(diamonds$carat)), digits = 1))+
scale_y_continuous(breaks = round(as.vector(quantile(diamonds$price)), digits = 1))
#Axis labeling depending on the quantiles for logaritmic scaling
ggplot(diamonds, aes(carat, price)) +
geom_point(size=0.02, alpha=0.09, color=main2_color) +
xlab("")+
ylab("")+
scale_x_log10(breaks = round(as.vector(quantile(diamonds$carat)), digits = 1))+
scale_y_log10(breaks = round(as.vector(quantile(diamonds$price)), digits = 1))
#Adding a trend line
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(color=main2_color, size=0.8, alpha=0.09)+
stat_smooth(color=decoration_color)
#Small multiples - one variable
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(color=main2_color, size=0.8, alpha=0.09)+
facet_wrap( ~ cut, ncol=2) +
stat_smooth(color=decoration_color)
#Small multiples - one variable with free scale
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(color=main2_color, size=0.8, alpha=0.09)+
facet_wrap( ~ cut, ncol=2, scales = "free") +
stat_smooth(color=decoration_color)
#Small multiples- two variables
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(color=main2_color, size=0.8, alpha=0.09)+
facet_wrap(color ~ cut) +
stat_smooth(color=decoration_color)
#Excercise: Now analyse the relationship of price with the variables depth and tables
#Basic scatterplot - adjusting the opacity
ggplot(diamonds, aes(x=depth, y=price)) +
geom_point(size=0.02, alpha=0.09, color=main2_color)
#Hexagonal binning
#Checking the options
?geom_hex
#Aggregation through hexagonal binning - defining the number of bins
ggplot(diamonds, aes(x=carat, y=price))+
geom_hex(bins=40, alpha = 0.6)+
xlim(0, 5)+
scale_fill_gradient(low=main2_color, high=main1_color)
#Aggregation through hexagonal binning - logaritmic scaling
ggplot(diamonds, aes(x=carat, y=price)) +
geom_hex(alpha = 0.6) +
scale_x_log10(breaks = round(as.vector(quantile(diamonds$carat)), digits = 1))+
scale_y_log10(breaks = round(as.vector(quantile(diamonds$price)), digits = 1))+
scale_fill_gradient(low=main2_color, high=main1_color)
#Checking the options
?geom_bin2d
#Heatmap based on rectangles
ggplot(diamonds, aes(x=carat, y=price)) +
geom_bin2d(bins = 50) +
scale_fill_gradient(low=main2_color, high=main1_color)
#Heatmap based on rectangles
ggplot(diamonds, aes(x=carat, y=price)) +
geom_bin2d(bins = 50, alpha = 0.6)+
scale_fill_gradient(low=main2_color, high=main1_color)
#Checking the options
?stat_density_2d
#Density estimation with contours
ggplot(diamonds, aes(x=carat, y=price)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
scale_fill_continuous(type = "viridis")
#Density estimation with contours
ggplot(diamonds, aes(x=carat, y=price)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
scale_fill_gradient(low=main2_color, high=main1_color)
#Adding a stroke
ggplot(diamonds, aes(x=carat, y=price)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") +
scale_fill_gradient(low=main2_color, high=main1_color)
#Excercise: choose a different dataset and check on distribution and create facets
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.