Skip to content

Instantly share code, notes, and snippets.

@jrosell
Created July 1, 2020 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrosell/9f1bfc21745653057b5bb2ee04ab7280 to your computer and use it in GitHub Desktop.
Save jrosell/9f1bfc21745653057b5bb2ee04ab7280 to your computer and use it in GitHub Desktop.
if(!require(tidyverse)) install.packages("tidyverse")
if(!require(patchwork)) install.packages("patchwork")
if(!require(nycflights13)) install.packages('nycflights13')
if(!require(tidymodels)) install.packages("tidymodels")
if(!require(modelr)) install.packages("modelr")
library(tidymodels)
library(modelr)
library(tidymodels)
library(patchwork)
library(tidyverse)
# Variación
p1 <- ggplot(diamonds) + geom_bar(aes(x = cut)) + labs(subtitle = "Gráfico de barras para variable catagórica")
diamonds %>% count(cut) %>% arrange(desc(n))
p2 <- ggplot(diamonds) + geom_histogram( aes(x = carat), binwidth = 0.01) + labs(subtitle = "Histograma para variable contínua")
diamonds %>% count(cut_width(carat, 0.01)) %>% arrange(desc(n))
p3 <- ggplot(diamonds, aes(x = carat, colour = cut)) + geom_freqpoly(binwidth = 0.2) + labs(subtitle = "Polígonos de frecuencia para covariación")
diamonds %>% count(cut_width(carat, 0.2), cut) %>% arrange(desc(n))
p1 + p2 + p3
# Valores atípicos
ggplot(diamonds) + geom_boxplot(aes(x = y)) + scale_x_continuous(breaks=seq(0, 60, 2))
inusual <- diamonds %>%
filter(y < 3 | y > 12) %>%
select(price, x, y, z) %>%
arrange(y)
diamonds2 <- diamonds %>%
mutate(
y = case_when(
(y < 3 | y > 12) ~ NA_real_,
TRUE ~ y
))
# Covariación
p1 <- ggplot(diamonds) + geom_count(aes(x = cut, y = color)) + labs(subtitle = "geom_count: 2 categóricas :)")
p2 <- diamonds %>% count(color, cut) %>% ggplot(aes(x = color, y = cut)) + geom_tile(aes(fill = n)) + labs(subtitle = "geom_tile: 2 categóricas :)")
p3 <- ggplot(diamonds, aes(x = price)) + geom_freqpoly(aes(colour = cut), binwidth = 500) + labs(subtitle = "geom_freqpoly: 1 categórica y 1 contínua :(")
p4 <- ggplot(diamonds, aes(x = price, y = ..density..)) + geom_freqpoly(aes(colour = cut), binwidth = 500) + labs(subtitle = "geom_freqpoly (density): 1 categórica y 1 contínua :)")
p5 <- ggplot( diamonds, aes(x = price, y = cut)) + geom_boxplot() + labs(subtitle = "geom_boxplot: 1 categórica y 1 contínua :)")
p6 <- ggplot(diamonds) + geom_point(aes(x = carat, y = price), alpha = 1/100) + labs(subtitle = "geom_point: 2 contínuas :)")
p7 <- ggplot(diamonds) + geom_bin2d(aes(x = carat, y = price)) + labs(subtitle = "geom_bin2d: 2 contínuas :)")
(p1 + p2) / (p3 + p4 + p5) / (p6 + p7)
# Residuales
lm_spec <- linear_reg() %>% aet_engine("lm") %>% set_mode("regression")
lm_fit <- lm_spec %>% fit( log(price) ~ log(carat), data = diamonds )
diamonds2 <- diamonds %>% add_residuals(lm_fit$fit) %>% mutate(resid = exp(resid))
p1 <- ggplot(diamonds, aes(x = carat, y = price)) + geom_point() + geom_smooth(method = "lm", se = FALSE)
p2 <- ggplot(diamonds, aes(x = log(carat), y = log(price))) + geom_point() + geom_smooth(method = "lm", se = FALSE)
p3 <- ggplot(diamonds2) + geom_point(aes(x = carat, y = resid))
p4 <- ggplot(diamonds2) + geom_boxplot(aes(x = cut, y = resid))
(p1 + p2) / (p3 + p4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment