Skip to content

Instantly share code, notes, and snippets.

@avallecam
Last active June 12, 2020 07:22
Show Gist options
  • Save avallecam/38c8840bda652b1f28582f5dca09b0c4 to your computer and use it in GitHub Desktop.
Save avallecam/38c8840bda652b1f28582f5dca09b0c4 to your computer and use it in GitHub Desktop.
usa `skimr` y `compareGroups` para observar distribuciones y generar tabla 1 y tabla 2
``` r
library(tidyverse)
library(compareGroups)
library(skimr)
library(rlang)
library(labelled)
#importa y limpiar
sb <- Seatbelts %>%
as_tibble() %>%
#to show as categorical variable
mutate(law=as.factor(law)) %>%
#to add some labels
set_variable_labels(
DriversKilled = "Car drivers killed",
#drivers = "Same as UKDriverDeaths",
front = "Front-seat passengers killed or seriously injured",
rear = "Rear-seat passengers killed or seriously injured",
kms = "Distance driven",
PetrolPrice = "Petrol price",
VanKilled = "Number of van (light goods vehicle) drivers",
law = "Was the law in effect that month?")
#ver labels
look_for(sb,details = T) %>% select(variable,label,levels)
#> variable label levels
#> 1 DriversKilled Car drivers killed
#> 2 drivers <NA>
#> 3 front Front-seat passengers killed or seriously injured
#> 4 rear Rear-seat passengers killed or seriously injured
#> 5 kms Distance driven
#> 6 PetrolPrice Petrol price
#> 7 VanKilled Number of van (light goods vehicle) drivers
#> 8 law Was the law in effect that month? 0; 1
#descripcion y distribucion
sb %>% skim() #skim_to_wide() %>% select(variable,mean:p100) %>% mutate_at(vars(-variable),as.numeric)
#> Skim summary statistics
#> n obs: 192
#> n variables: 8
#>
#> -- Variable type:factor --------------------------------------------------------
#> variable missing complete n n_unique top_counts ordered
#> law 0 192 192 2 0: 169, 1: 23, NA: 0 FALSE
#>
#> -- Variable type:numeric -------------------------------------------------------
#> variable missing complete n mean sd p0 p25 p50 p75 p100 hist
#> drivers 0 192 192 1670.31 289.61 1057 1461.75 1631 1850.75 2654 ▂▅▇▅▃▂▁▁
#> DriversKilled 0 192 192 122.8 25.38 60 104.75 118.5 138 198 ▁▃▇▇▅▃▁▁
#> front 0 192 192 837.22 175.1 426 715.5 828.5 950.75 1299 ▂▃▅▇▆▅▂▁
#> kms 0 192 192 14993.6 2938.05 7685 12685 14987 17202.5 21626 ▁▃▇▇▇▇▅▂
#> PetrolPrice 0 192 192 0.1 0.012 0.081 0.093 0.1 0.11 0.13 ▅▅▃▆▇▇▁▁
#> rear 0 192 192 401.21 83.1 224 344.75 401.5 456.25 646 ▂▅▆▇▆▃▁▁
#> VanKilled 0 192 192 9.06 3.64 2 6 8 12 17 ▂▆▇▇▆▇▅▁
#descripcion y distribucion estratificada
sb %>% group_by(law) %>% skim()
#> Skim summary statistics
#> n obs: 192
#> n variables: 8
#> group variables: law
#>
#> -- Variable type:numeric -------------------------------------------------------
#> law variable missing complete n mean sd p0 p25 p50 p75 p100 hist
#> 0 drivers 0 169 169 1717.75 266.89 1309 1511 1653 1926 2654 ▆▇▆▃▃▂▁▁
#> 0 DriversKilled 0 169 169 125.87 24.26 79 108 121 140 198 ▁▇▇▆▃▂▁▁
#> 0 front 0 169 169 873.46 151.54 567 767 860 986 1299 ▂▅▇▇▅▃▂▁
#> 0 kms 0 169 169 14463.3 2663.17 7685 12387 14455 16585 21040 ▁▃▇▇▇▇▅▁
#> 0 PetrolPrice 0 169 169 0.1 0.012 0.081 0.091 0.1 0.11 0.13 ▆▆▅▇▇▅▁▁
#> 0 rear 0 169 169 400.32 84.88 224 344 401 454 646 ▂▅▆▇▆▂▁▁
#> 0 VanKilled 0 169 169 9.59 3.5 2 7 10 13 17 ▁▅▇▇▇▇▅▂
#> 1 drivers 0 23 23 1321.7 199.72 1057 1171 1282 1463.5 1763 ▅▇▅▁▅▂▁▂
#> 1 DriversKilled 0 23 23 100.26 22.23 60 85 92 119 154 ▁▂▇▂▂▃▁▁
#> 1 front 0 23 23 570.96 81.29 426 516 585 629.5 721 ▃▅▅▅▆▇▂▃
#> 1 kms 0 23 23 18890.22 1657.61 15511 17971 19162 19952 21626 ▃▃▃▇▇▇▃▆
#> 1 PetrolPrice 0 23 23 0.12 0.002 0.11 0.11 0.12 0.12 0.12 ▃▇▅▅▁▇▅▃
#> 1 rear 0 23 23 407.74 69.92 296 347 408 471.5 521 ▅▃▂▃▂▁▇▂
#> 1 VanKilled 0 23 23 5.17 1.83 2 3.5 5 7 8 ▂▇▅▅▁▆▇▃
#tabla 1
compareGroups(~.,data = sb) %>%
createTable(digits = 1,sd.type = 2,show.n = F)
#>
#> --------Summary descriptives table ---------
#>
#> ________________________________________________________________
#> [ALL]
#> N=192
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#> Car drivers killed 122.8±25.4
#> drivers 1670.3±289.6
#> Front-seat passengers killed or seriously injured 837.2±175.1
#> Rear-seat passengers killed or seriously injured 401.2±83.1
#> Distance driven 14993.6±2938.0
#> Petrol price 0.1±<0.1
#> Number of van (light goods vehicle) drivers 9.1±3.6
#> Was the law in effect that month?:
#> 0 169 (88.0%)
#> 1 23 (12.0%)
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#tabla 2
compareGroups(law~.,data = sb) %>%
createTable(digits = 1,sd.type = 2)
#>
#> --------Summary descriptives table by 'Was the law in effect that month?'---------
#>
#> _________________________________________________________________________________________
#> 0 1 p.overall
#> N=169 N=23
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#> Car drivers killed 125.9±24.3 100.3±22.2 <0.001
#> drivers 1717.8±266.9 1321.7±199.7 <0.001
#> Front-seat passengers killed or seriously injured 873.5±151.5 571.0±81.3 <0.001
#> Rear-seat passengers killed or seriously injured 400.3±84.9 407.7±69.9 0.645
#> Distance driven 14463.3±2663.2 18890.2±1657.6 <0.001
#> Petrol price 0.1±<0.1 0.1±<0.1 <0.001
#> Number of van (light goods vehicle) drivers 9.6±3.5 5.2±1.8 <0.001
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#especifica metodo no-parametrico
#sleep %>% skim()
compareGroups(group~extra,data = sleep,method = c(extra=2)) %>%
createTable(digits = 1,sd.type = 2)
#>
#> --------Summary descriptives table by 'group'---------
#>
#> ____________________________________________
#> 1 2 p.overall
#> N=10 N=10
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#> extra 0.4 [-0.2;1.7] 1.8 [0.9;4.2] 0.064
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#especifica porcentajes por fila
#CO2 %>% skim()
set.seed(33)
CO2 %>%
sample_n(50) %>%
compareGroups(Treatment~Type+conc,data = .,byrow = T) %>%
createTable(digits = 1,sd.type = 2)
#>
#> --------Summary descriptives table by 'Treatment'---------
#>
#> _________________________________________________
#> nonchilled chilled p.overall
#> N=27 N=23
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#> Type: 1.000
#> Quebec 13 (54.2%) 11 (45.8%)
#> Mississippi 14 (53.8%) 12 (46.2%)
#> conc 325.9±197.6 496.1±306.8 0.028
#> ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#exportable en formato excel
#tabla 1
compareGroups(~.,data = sb) %>%
createTable(digits = 1,sd.type = 2) %>%
export2xls("table/mi-tabla-01.xlsx")
#tabla 2
compareGroups(law~.,data = sb) %>%
createTable(digits = 1,sd.type = 2) %>%
export2xls("table/mi-tabla-02.xlsx")
#pendiente
# ¿cómo reordenar variables categóricas de acuerdo a su frecuencia?
# rpta: usar forcats::fct_reorder()
#improve
# en caso no se generen los p.values por exceso de categorías u otro motivo
# usar el argumento chisq.test.perm = TRUE dentro de compareGroups()
```
<sup>Created on 2019-09-11 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment