Skip to content

Instantly share code, notes, and snippets.

@febriliankr
Created October 14, 2023 06:26
Show Gist options
  • Save febriliankr/40c379747a976252514a95e79d904940 to your computer and use it in GitHub Desktop.
Save febriliankr/40c379747a976252514a95e79d904940 to your computer and use it in GitHub Desktop.
Code displayed in RStudio for Health Research https://risetku.com/academy
# Basic Math
1 + 2
10/2
# Literal
TRUE
# Assign variable
object <- "object"
object
#Install Packages
install.packages("tidyverse")
install.packages("medicaldata")
library(tidyverse)
library(medicaldata)
#Load Data
medicaldata::covid_testing
library(readxl)
df <- read_excel("Downloads/risetku_rstudio/covid.xlsx")
# Path ke file excel, atau import dengan fitur `Files`
View(df)
help(covid_testing)
#View Data
head(df)
glimpse(df)
class(df$age)
#Data Cleaning
#Select Data
select(df,
-age,
-gender,
-rec_ver_tat,
-pan_day)
filter(df, gender == "female")
df %>%
select(age,
gender,
rec_ver_tat,
pan_day) %>%
filter(df, gender == "female")
#Remove Missing Data
df_cleaned <- na.omit(df)
view(df_cleaned)
#Reclassification
df_cleaned <- df_cleaned %>%
mutate(waktu_pandemi = case_when(
pan_day < 34 ~ 1,
pan_day > 33 & pan_day< 67 ~ 2,
pan_day > 66 ~ 3,
))
view(df_cleaned)
#Labeling Kelompok
df_cleaned$waktu_pandemi <-
factor(df_cleaned$waktu_pandemi,
levels=c(1,2,3),
labels=c("1st 33-days", # Reference
"2nd 33-days",
"3rd 33-days"))
df_cleaned$drive_thru_ind <-
factor(df_cleaned$drive_thru_ind,
levels=c(1,0),
labels=c("Yes",
"No"))
view(df_cleaned)
glimpse(df_cleaned)
#Remove Outliers Collect to Receive
summary(df_cleaned$col_rec_tat)
length(df_cleaned$col_rec_tat)
#8303
df_cleaned$zscore_col_rec_tat <- (abs(df_cleaned$col_rec_tat-
mean(df_cleaned$col_rec_tat))/
sd(df_cleaned$col_rec_tat))
view(df_cleaned)
df_cleaned <-
subset(df_cleaned,
df_cleaned$zscore_col_rec_tat < 3)
length(df_cleaned$col_rec_tat)
#8302
#Remove Outliers Receive to Verification
summary(df_cleaned$rec_ver_tat)
length(df_cleaned$rec_ver_tat)
#8302
df_cleaned$zscore_rec_ver_tat <- (abs(df_cleaned$rec_ver_tat-
mean(df_cleaned$rec_ver_tat))/
sd(df_cleaned$rec_ver_tat))
df_cleaned <- subset(df_cleaned, df_cleaned$zscore_rec_ver_tat < 3)
length(df_cleaned$rec_ver_tat)
#8237
#Exploratory
library(ggpubr)
#Collect to Receive based on Days
#Buat Frekuensi Per Hari
df_freq_col_rec_tat <- df_cleaned %>%
group_by(pan_day) %>%
summarize(Freq = n())
view(df_mean_col_rec_tat)
#Buat Mean Per Hari
df_mean_col_rec_tat <- df_cleaned %>%
group_by(pan_day) %>%
summarise_at(vars(col_rec_tat), list(name = mean))
view(df_mean_col_rec_tat)
#Gabungin Data Frame
df_col_rec_tat_viz <- cbind(df_freq_col_rec_tat,df_mean_col_rec_tat)
df_col_rec_tat_viz
df_mean_col_rec_tat <- df_mean_col_rec_tat %>%
select(name) %>%
rename(mean = name)
#Visual Collect to Receive
ggplot(df_col_rec_tat_viz, aes(x = pan_day,
y = mean)) +
geom_point(alpha=0.6, aes(size = Freq,
color = Freq)) +
labs(color = "Number of Patients", y = "Mean (hours)", x = "-Day Pandemic") +
theme_bw()
#Receive to Verification based on Days
#Buat Frekuensi Per Hari
df_freq_rec_ver_tat <- df %>%
group_by(pan_day) %>%
summarize(Freq = n())
#Buat Mean Per Hari
df_mean_rec_ver_tat <- df %>%
group_by(pan_day) %>%
summarise_at(vars(rec_ver_tat), list(name = mean))
df_mean_rec_ver_tat <- df_mean_rec_ver_tat %>%
select(name) %>%
rename(mean = name)
view(df_mean_rec_ver_tat)
#Gabungin Data Frame
df_rec_ver_tat_viz <- cbind(df_freq_rec_ver_tat,df_mean_rec_ver_tat)
df_rec_ver_tat_viz
#Visual Collect to Receive
ggplot(df_rec_ver_tat_viz, aes(x = pan_day,
y = mean)) +
geom_point(alpha=0.6, aes(size = Freq,
color = Freq)) +
labs(fill = "Number of Patients", y = "Mean (hours)", x = "-Day Pandemic") +
theme_bw()
#Data Berdasarkan Kelompok
#Visual Dasar Colect to Receive
ggbarplot(df_cleaned, x = "waktu_pandemi",
y = "col_rec_tat",
add = "mean_se",
color = "waktu_pandemi", palette = "jco") +
stat_compare_means(label.y = 3.5)
#Visual Receive to Verification + Signifikansi
ggbarplot(df_cleaned, x = "waktu_pandemi", y = "rec_ver_tat", add = "mean_se",
color = "waktu_pandemi", palette = "jco") +
stat_compare_means(method = "anova",label.y = 6)
#Descriptive
install.package("table1")
library(table1)
#Tabel Deskriptif
tabel_deskriptif <- table1(~ age +
gender +
result +
payor_group +
demo_group +
drive_thru_ind +
col_rec_tat +
rec_ver_tat | waktu_pandemi,
data = df_cleaned,
topclass="Rtable1-zebra")
tabel_deskriptif
#Ganti Label di Tabel Deksriptif
label(df_cleaned$gender) <- "Sex"
label(df_cleaned$age) <- "Age"
label(df_cleaned$result) <- "Test Result"
label(df_cleaned$payor_group) <- "Payor"
label(df_cleaned$demo_group) <- "Demographic Group"
label(df_cleaned$drive_thru_ind) <- "Drive Thru"
label(df_cleaned$col_rec_tat) <- "Colect to Receive Time"
label(df_cleaned$rec_ver_tat) <- "Receive to Verification Time"
units(df_cleaned$age) <- "years"
units(df_cleaned$rec_ver_tat) <- "hours"
units(df_cleaned$col_rec_tat) <- "hours"
#Statistic
library(rstatix)
#Normality test
normality <- df_cleaned %>%
group_by(waktu_pandemi) %>%
shapiro_test(col_rec_tat) %>%
add_significance()
normality
#Kruskall-Walis
kruskal.test(col_rec_tat ~ waktu_pandemi, data = df_cleaned)
#Cross-Tabulation
library(sjPlot)
sjPlot::tab_xtab(var.row = df_cleaned$drive_thru_ind,
var.col = df_cleaned$result,
title = "Table Title", show.row.prc = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment