Skip to content

Instantly share code, notes, and snippets.

View karan19100's full-sized avatar

Karan Shah karan19100

View GitHub Profile
@karan19100
karan19100 / dataset_detail.R
Created July 18, 2020 17:02
datasetdetails_by_karan shah
library(e1071) # it includes function to compute skewness
library(plyr) # it allows to wrangle data
library(ggplot2) # it allows to create a number of different types of plots
ais2 <- subset(ais, sex=="m") # only male athletes
ais3 = ais2[,c(3,4)] # subset column number that correspond to "hg" and "hc"
newdata <- rename(ais3, c("hg"="HEMAGLOBIN", "hc"="HEMATOCRIT"))
str(newdata)
summary(newdata) # overview of the two selected variable
@karan19100
karan19100 / scatter_plot.R
Created July 18, 2020 17:15
scatterplot_by_karanshah
qplot(HEMAGLOBIN, HEMATOCRIT, data = newdata,
main = "HEMAGLOBIN and HEMATOCRIT relationship") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(colour = "blue", size = 1.5) +
scale_y_continuous(breaks = c(30:65), minor_breaks = NULL) +
scale_x_continuous(breaks = c(10:25), minor_breaks = NULL)
@karan19100
karan19100 / Box_plot.R
Created July 18, 2020 18:17
Boxplot_by_karanshah
par(mfrow=c(1, 2)) # it divides graph area in two parts
boxplot(newdata$HEMAGLOBIN, col = "yellow", border="blue",
main = "HEMAGLOBIN boxplot",
ylab = "g per decaliter")
boxplot(newdata$HEMATOCRIT, col = "orange", border="blue",
main = "HEMATROCRIT boxplot",
ylab = "percent values")
@karan19100
karan19100 / histogram.R
Created July 18, 2020 18:43
Histogram_by_karanshah
# Histogram of HEMAGLOBIN
qplot(HEMAGLOBIN, data = newdata, geom="histogram", binwidth=0.5,
fill=I("azure4"), col=I("azure3")) +
labs(title = "HEMAGLOBIN") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x ="Concentration (in g per decaliter)") +
labs(y = "Frequency") +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35,40,45,50), minor_breaks = NULL) +
scale_x_continuous(breaks = c(10:25), minor_breaks = NULL) +
geom_vline(xintercept = mean(newdata$HEMAGLOBIN), show_guide=TRUE, color
@karan19100
karan19100 / histogram_2.R
Created July 18, 2020 18:46
histogram_bykaranshah
# Histogram of HEMATOCRIT
qplot(HEMATOCRIT, data = newdata, geom="histogram", binwidth=1,
fill=I("azure4"), col=I("azure3")) +
labs(title = "HEMATOCRIT") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x ="percent values") +
labs(y = "Frequency") +
scale_y_continuous(breaks = c(0,5,10,15,20,25), minor_breaks = NULL) +
scale_x_continuous(breaks = c(30:65), minor_breaks = NULL) +
geom_vline(xintercept = mean(newdata$HEMATOCRIT), show_guide=TRUE, color
@karan19100
karan19100 / Densityplot.R
Created July 18, 2020 18:50
Densityplot_by_karanshah
par(mfrow=c(1, 2)) # it divides graph area in two parts
plot(density(newdata$HEMAGLOBIN), main="Density: HEMAGLOBIN", ylab="Frequency",
sub=paste("Skewness:", round(e1071::skewness(newdata$HEMAGLOBIN), 2)))
polygon(density(newdata$HEMAGLOBIN), col="yellow")
plot(density(newdata$HEMATOCRIT), main="Density: HEMATOCRIT", ylab="Frequency",
sub=paste("Skewness:", round(e1071::skewness(newdata$HEMATOCRIT), 2)))
polygon(density(newdata$HEMATOCRIT), col="orange")
@karan19100
karan19100 / linear_model.R
Created July 18, 2020 19:00
build_linear_model_by_karanshah
# Show the relationship creating a regression line
qplot(HEMAGLOBIN, HEMATOCRIT, data = newdata,
main = "HEMAGLOBIN and HEMATOCRIT relationship") +
theme(plot.title = element_text(hjust = 0.5)) +
stat_smooth(method="lm", col="red", size=1) +
geom_point(colour = "blue", size = 1.5) +
scale_y_continuous(breaks = c(30:65), minor_breaks = NULL) +
scale_x_continuous(breaks = c(10:25), minor_breaks = NULL)
@karan19100
karan19100 / linear_model_2.R
Created July 18, 2020 19:06
linear_model_by_karanshah
set.seed(123) # setting seed to reproduce results of random sampling
HEMAGLOBIN_CENT = scale(newdata$HEMAGLOBIN, center=TRUE, scale=FALSE) # center the variable
# Show the relationship with new variable centered, creating a regression line
qplot(HEMAGLOBIN_CENT, HEMATOCRIT, data = newdata,
main = "HEMAGLOBIN_CENT and HEMATOCRIT relationship") +
theme(plot.title = element_text(hjust = 0.5)) +
stat_smooth(method="lm", col="red", size=1) +
geom_point(colour = "blue", size = 1.5) +
scale_y_continuous(breaks = c(30:65), minor_breaks = NULL) +
scale_x_continuous(breaks = c(-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3,3.5,4), minor_breaks = NULL)
@karan19100
karan19100 / linear_model_analysis.R
Created July 18, 2020 19:14
linear_model_analysis_by_karanshah
mod1 = lm(HEMATOCRIT ~ HEMAGLOBIN_CENT, data = newdata)
summary(mod1)
@karan19100
karan19100 / t-value.R
Created July 18, 2020 19:30
t-value_by_karanshah
modSummary <- summary(mod1) # capture model summary as an object
modCoeff <- modSummary$coefficients # model coefficients
beta.estimate <- modCoeff["HEMAGLOBIN_CENT", "Estimate"] # get beta coefficient estimate
std.error <- modCoeff["HEMAGLOBIN_CENT", "Std. Error"] # get standard error
t_value <- beta.estimate/std.error # calculate t statistic
print(t_value) # print t-value