This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from scipy import stats | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.model_selection import RandomizedSearchCV, KFold | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
from xgboost import XGBClassifier | |
# spit data into train test | |
def split_data(df: pd.DataFrame, parameters: dict) -> dict: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(patchwork) | |
# generate scatter for entire dataset | |
p_all <- scatter_fn(penguins, bill_length_mm, bill_depth_mm, "All Species") | |
# get species scatters from penguin_scatters dataframe | |
for (i in 1:3) { | |
assign(paste("p", i, sep = "_"), | |
penguin_scatters$plot[i][[1]]) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# generic function for generating a simple scatter plot in ggplot2 | |
scatter_fn <- function(df, col1, col2, title) { | |
df %>% | |
ggplot2::ggplot(aes(x = {{col1}}, y = {{col2}})) + | |
ggplot2::geom_point() + | |
ggplot2::geom_smooth() + | |
ggplot2::labs(title = title) | |
} | |
# run function across species and store plots in a list column |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
penguins %>% | |
nest_by(species) | |
# A tibble: 3 x 2 | |
# Rowwise: species | |
species data | |
<fct> <list<tbl_df[,7]>> | |
1 Adelie [152 × 7] | |
2 Chinstrap [68 × 7] | |
3 Gentoo [124 × 7] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
penguins %>% | |
dplyr::group_by(species) %>% | |
tidyr::nest() %>% | |
dplyr::rowwise() | |
# A tibble: 3 x 2 | |
# Rowwise: species | |
species data | |
<fct> <list> | |
1 Adelie <tibble [152 × 7]> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(broom) | |
penguin_models <- penguins %>% | |
dplyr::group_by(species) %>% | |
dplyr::summarise(broom::glance(lm(body_mass_g ~ flipper_length_mm + bill_length_mm + bill_depth_mm))) # summarise model stats | |
penguin_models | |
# A tibble: 3 x 13 | |
species r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
penguin_models <- penguins %>% | |
dplyr::group_by(species) %>% | |
dplyr::summarise(model = list(lm(body_mass_g ~ flipper_length_mm + bill_length_mm + bill_depth_mm))) # store models in a list column | |
penguin_models | |
# A tibble: 3 x 2 | |
species model | |
<fct> <list> | |
1 Adelie <lm> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
penguin_stats <- penguins %>% | |
dplyr::group_by(species) %>% | |
dplyr::summarise(across(ends_with("mm"), | |
list(mean = ~mean(.x, na.rm = TRUE), sd = ~sd(.x, na.rm = TRUE)), # name summary functions | |
.names = "{gsub('_|_mm', '', col)}_{fn}")) # structure for summarised column names | |
penguin_stats | |
# A tibble: 3 x 7 | |
species billlength_mean billlength_sd billdepth_mean billdepth_sd flipperlength_mean flipperlength_sd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
penguin_stats <- penguins %>% | |
dplyr::group_by(species) %>% | |
dplyr::summarise(across(ends_with("mm"), # do this for any column ending in mm | |
list(~mean(.x, na.rm = TRUE), ~sd(.x, na.rm = TRUE)))) # calculate a mean and sd | |
penguin_stats | |
# A tibble: 3 x 7 | |
species bill_length_mm_1 bill_length_mm_2 bill_depth_mm_1 bill_depth_mm_2 flipper_length_mm_1 flipper_length_mm_2 | |
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
penguins_wide <- penguins_long %>% | |
tidyr::pivot_wider(names_from = c("part", "measure", "unit"), # pivot these columns | |
values_from = "value", # take the values from here | |
names_sep = "_") # combine col names using an underscore | |
penguins_wide | |
# A tibble: 344 x 9 | |
# Groups: species, island, sex, year [35] | |
species island sex year penguinid bill_length_mm bill_depth_mm flipper_length_mm body_mass_g |
NewerOlder