Skip to content

Instantly share code, notes, and snippets.

@eliardocosta
Last active October 27, 2023 11:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save eliardocosta/a5c39e218ff5899720f54735e8bf2d44 to your computer and use it in GitHub Desktop.
Save eliardocosta/a5c39e218ff5899720f54735e8bf2d44 to your computer and use it in GitHub Desktop.
Links e Scripts R para a disciplina EST0232 Fundamentos de Estatística Aplicada
# PARTE I -----------------------------------------------------------------
# upload do conjunto de dados
# Metodo 1: ir ao botao 'Import Dataset'
# Metodo 2: via comandos abaixo
install.packages("readxl") # instalando pacote 'readxl'
library(readxl) # carregando pacote 'readxl'
dados_estud = read_excel("C:\caminho\para\arquivo\dados-exemplo-estudantes.xls")
View(dados_estud)
dados_estud
dados_estud$Sexo
dados_estud[, 3]
attach(dados_estud)
# slide 16
Sexo
table(Sexo)
n = length(Sexo)
n
table(Sexo)/n
(table(Sexo)/n)*100 # em porcentagem
# slide 17
table(Toler)
table(Toler)/n
prop.table(table(Toler))
(table(Toler)/n)*100
# slide 18
table(Idade)
table(Idade)/n
prop.table(table(Idade))
(table(Idade)/n)*100 # em porcentagem
cumsum(table(Idade)/n)
(cumsum(table(Idade)/n))*100 # em porcentagem
# slide 19
tab_peso = hist(Peso, plot = FALSE, right = FALSE,
nclass = 6)
tab_peso
tab_peso$counts
tab_peso$counts/n
cumsum(tab_peso$counts/n)
# slide 22
tab_tv = hist(TV, plot = FALSE, right = FALSE, nclass = 5)
tab_tv
tab_tv = hist(TV, breaks = c(0, 6, 12, 18, 24, 36),
plot = FALSE, right = FALSE)
tab_tv
# slide 23
barplot(table(Idade), xlab = "Idade", ylab = "Frequência",
main = "Gráfico de barras para Idade",
col = "grey") # frequencia
barplot(table(Idade)/n, xlab = "Idade", ylab = "Frequência Relativa",
main = "Gráfico de barras para Idade",
col = "grey") # frequencia # frequencia relativa
# slide 25
hist(Peso, right = FALSE, xlab = "Peso", ylab = "Frequência",
main = "Histograma de Peso")
hist(Peso, freq = FALSE, right = FALSE)
hist(Peso, right = FALSE, plot = FALSE)
# PARTE II ----------------------------------------------------------------
attach(dados_estud)
# slide 31
median(Peso)
quantile(Peso, probs = 0.50)
# slide 32
quantile(Peso, probs = 0.25) # Q1
quantile(Peso, probs = 0.50) # Q2
quantile(Peso, probs = 0.75) # Q3
quantile(Peso)
# slide 36
par(mfrow = c(1, 2))
x = rnorm(1000)
boxplot(x, ylim = c(-4, 4))
x = rnorm(1000, sd = 2)
boxplot(x, ylim = c(-4, 4))
par(mfrow = c(1, 1))
hist(x, prob = TRUE)
plot(function(x) dnorm(x, sd = 2), -6, 6, col = "red", add = TRUE)
boxplot(Peso, ylab = "Peso (kg)")
boxplot(Peso, xlab = "Peso (kg)", horizontal = TRUE)
# slide 37
boxplot(Peso ~ Sexo, ylab = "Peso (kg)")
boxplot(Peso ~ Sexo, xlab = "Peso (kg)", horizontal = TRUE)
boxplot(Peso ~ Turma, ylab = "Peso (kg)")
boxplot(Peso ~ Fuma, ylab = "Peso (kg)")
# slide 42
mean(Idade)
summary(Idade)
# slide 43
mean(Peso)
n = length(Peso)
sum(Peso)/n
min(Peso)
max(Peso)
summary(Peso)
max(Peso) - min(Peso)
x1 = rnorm(100, sd = 1)
x2 = rnorm(100, sd = 4)
par(mfrow = c(1, 2))
boxplot(x1, ylim = c(-11, 11))
boxplot(x2, ylim = c(-11, 11))
par(mfrow = c(1, 1))
plot(function(x) dnorm(x), -11, 11, col = "blue")
plot(function(x) dnorm(x, sd = 4), -11, 11, col = "red", add = TRUE)
# slide 55
quantile(Peso, probs = 0.75) - quantile(Peso, probs = 0.25)
# slide 56
var(Peso)
sqrt(var(Peso))
sd(Peso)
# Parte III ---------------------------------------------------------------
attach(dados_estud)
# slide 63
table(Turma, Fuma)
table(Sexo, Fuma)
# slide 65
table(Turma, Fuma)/n
prop.table(table(Turma, Fuma))
(table(Turma, Fuma)/n)*100
table(Sexo, Fuma)/n
prop.table(table(Sexo, Fuma))
(table(Sexo, Fuma)/n)*100
# slide 66
spineplot(as.factor(Fuma) ~ as.factor(Turma), xlab = "Turma",
ylab = "Fuma")
spineplot(as.factor(Turma) ~ as.factor(Fuma), xlab = "Fuma",
ylab = "Turma")
spineplot(as.factor(Fuma) ~ as.factor(Toler), xlab = "Tolerância",
ylab = "Fuma")
# slide 68
plot(Alt, Peso, xlab = "Altura (m)", ylab = "Peso (kg)")
abline(v = mean(Alt), lty = 2)
abline(h = mean(Peso), lty = 2)
# slide 72
cor(Alt, Peso)
plot(Idade, Peso)
cor(Peso, Idade)
plot(Alt, Peso)
cor(Peso, Alt)
# slide 75
aggregate(Peso ~ Sexo, dados_estud, mean)
aggregate(Peso ~ Sexo, dados_estud, sd)
aggregate(Peso ~ Sexo, dados_estud, min)
aggregate(Peso ~ Sexo, dados_estud, max)
aggregate(Peso ~ Sexo, dados_estud, summary)
# slide 76
boxplot(Peso ~ Sexo, ylab = "Peso (kg)", xlab = "Sexo")
# slide 77
cdplot(as.factor(Sexo) ~ Peso, xlab = "Peso (kg)", ylab = "Sexo")
cdplot(as.factor(Fuma) ~ Peso, xlab = "Peso (kg)", ylab = "Fuma")
cdplot(as.factor(Toler) ~ Peso, xlab = "Peso (kg)", ylab = "Tolerância")
# Parte IV ----------------------------------------------------------------
attach(dados_estud)
# slide 84
table(Sexo, Fuma)
mosaicplot(table(Sexo, Fuma), color = TRUE, xlab = "Sexo",
ylab = "Fuma", main = "")
# slide 85
table(Sexo, Fuma, Turma)
mosaicplot(table(Sexo, Fuma, Turma), color = TRUE, xlab = "Sexo",
ylab = "Fuma", main = "")
# slide 86
coplot(Peso ~ Alt | Idade, pch = 20, xlab = "Altura")
coplot(Peso ~ Alt | Idade, panel = panel.smooth, pch = 20,
lty = 2, xlab = "Altura")
# slide 87
coplot(Peso ~ Alt | Idade * Turma, panel = panel.smooth, pch = 20,
lty = 2, xlab = "Altura")
# slide 88
coplot(Peso ~ Alt | Idade * Turma, panel = panel.smooth,
col = c("blue", "green")[as.factor(Sexo)], lty = 2,
pch = 20, xlab = "Altura")
# Sexo: blue para 'F', green para 'M'
# slide 89
coplot(Peso ~ Alt | Sexo, panel = panel.smooth, lty = 2,
pch = 20, xlab = "Altura")
# slide 90
coplot(Peso ~ Alt | Sexo * Turma, panel = panel.smooth,
lty = 2, pch = 20, xlab = "Altura")
#-
coplot(Peso ~ Alt | Sexo * Turma, panel = panel.smooth, lty = 2,
pch = 20, col = c("blue", "green")[as.factor(Fuma)])
# Fuma: blue para 'NAO', green para 'SIM'
# slide 91
pairs(~ Alt + Peso + Idade, pch = 20)
pairs(~ Alt + Peso + Idade, pch = 20, panel = panel.smooth, lty = 2)
# slide 92
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
pairs(~ Alt + Peso + Idade, pch = 20, lower.panel = panel.cor,
upper.panel = panel.smooth, lty = 2)
# slide 93
panel.hist <- function(x, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "grey", ...)
}
pairs(~ Alt + Peso + Idade, pch = 20, lower.panel = panel.cor,
upper.panel = panel.smooth, diag.panel = panel.hist,
lty = 2)
#-
pairs(~ Alt + Peso + Idade, pch = 20, panel = panel.smooth,
lty = 2, col = c("blue", "magenta")[as.factor(Sexo)])
# Sexo: blue para 'F', magenta para 'M'
# Animacao
# No link https://yihui.org/animation/example/least-squares/
# Ou pelos comandos abaixo:
library(animation)
least.squares()
least.squares(ani.type = "intercept")
# slide 9
x = c(1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 4.6, 1.6, 5.5, 3.4)
y = c(0.7, -1, -0.2, -1.2, -0.1, 3.4, 0, 0.8, 3.7, 2)
plot(x, y)
reg.ex1 = lm(y ~ x)
reg.ex1
abline(reg.ex1, col = "red", lty = 2)
# slide 12
data(cars)
str(cars)
attach(cars)
plot(speed, dist)
reg.ex2 = lm(dist ~ speed)
reg.ex2
abline(reg.ex2, col = "red")
# slide 14
tbc = c(43.5, 44, 44, 44.5, 44, 45, 48, 49, 49.5, 51, 54.5, 57.5, 57.7, 61, 63, 72)
vo2 = c(22, 21, 22, 21.5, 25.5, 24.5, 30, 28, 32, 29, 38.5, 30.5, 57, 40, 58, 72)
plot(tbc, vo2)
reg.ex31 = lm(vo2 ~ tbc)
reg.ex31
abline(reg.ex31, col = "red")
# centralizando TBC no minimo
plot(tbc - min(tbc), vo2)
reg.ex32 = lm(vo2 ~ I(tbc - min(tbc)))
reg.ex32
abline(reg.ex32, col = "red")
#- slide 19
# dados
x = c(5.61, 8.27, 7.68, 5.05, 7.09, 7.61, 7.12, 4.13, 8.52, 9.86, 2.22, 8.70,
6.83, 6.54, 8.56, 4.01, 6.04, 8.94, 9.67, 6.37, 3.12, 9.64, 5.14, 4.15,
6.58)
y = c(43.34, 158.18, 111.18, 34.32, 89.76, 77.28, 61.17, 15.38, 174.04, 391.17,
7.14, 101.25, 73.09, 57.06, 146.79, 12.45, 38.65, 201.70, 254.11, 45.75,
7.48, 272.43, 18.73, 17.36, 72.87)
plot(x, y, ylim = c(-2, 401))
# regressao linear
reg1.lin = lm(y ~ x); reg1.lin
abline(reg1.lin, col = "red")
# regressao exponencial
ly = log(y)
reg1.exp = lm(ly ~ x); reg1.exp
alpha = exp(reg1.exp$coef[1]); alpha
beta = reg1.exp$coef[2]; beta
plot(function(x) alpha*exp(beta*x), xlim = c(2, 11), col = "blue", add = TRUE)
#- slide 20
library(MASS)
data(steam)
plot(steam$Temp, steam$Press, xlab = "Temperatura", ylab = "Pressão")
# regressao linear
reg2.lin = lm(Press ~ Temp, data = steam); reg2.lin
abline(reg2.lin, col = "red")
# regressao exponencial
reg2.exp = lm(I(log(Press)) ~ Temp, data = steam); reg2.exp
alpha = exp(reg2.exp$coef[1]); alpha
beta = reg2.exp$coef[2]; beta
plot(function(x) alpha*exp(beta*x), xlim = c(0, 110), col = "blue", add = TRUE)
#- slide 21
# dados
x = c(1.35, 2.35, 2.13, 1.15, 1.91, 2.10, 1.92, 0.80, 2.45, 2.95, 0.08, 2.51,
1.81, 1.70, 2.46, 0.75, 1.52, 2.60, 2.87, 1.64, 3.98, 9.69, 5.75, 4.88,
7.01, 9.39, 9.54, 9.16, 9.62, 8.70)
y = c(1.04, 1.36, 1.26, 1.09, 1.27, 1.22, 1.21, 0.90, 1.36, 1.32, 0.47, 1.26,
1.18, 1.17, 1.35, 0.87, 1.08, 1.37, 1.32, 1.14, 1.44, 1.90, 1.62, 1.60,
1.82, 1.96, 2.07, 1.92, 2.10, 1.91)
plot(x, y)
# regressao linear
reg3.lin = lm(y ~ x); reg3.lin
abline(reg3.lin, col = "red")
# regressao potencia
ly = log(y)
lx = log(x)
reg3.pot = lm(ly ~ lx); reg3.pot
alpha = exp(reg3.pot$coef[1]); alpha
beta = reg3.pot$coef[2]; beta
plot(function(x) alpha*x^beta, xlim = c(0, 10), col = "blue", add = TRUE)
#- slide 22
# dados
x = c(1.35, 2.35, 2.13, 1.15, 1.91, 2.10, 1.92, 0.80, 2.45, 2.95, 0.08, 2.51,
1.81, 1.70, 2.46, 0.75, 1.52, 2.60, 2.87, 1.64, 3.98, 9.69, 5.75, 4.88,
7.01, 9.39, 9.54, 9.16, 9.62, 8.70)
y = c(0.19, 0.08, 0.09, 0.12, 0.09, 0.11, 0.11, 0.25, 0.08, 0.08, 0.41, 0.10,
0.11, 0.11, 0.08, 0.35, 0.15, 0.07, 0.08, 0.13, 0.07, 0.03, 0.05, 0.05,
0.04, 0.03, 0.03, 0.03, 0.03, 0.03)
plot(x, y)
# reg linear
reg4.lin = lm(y ~ x); reg4.lin
abline(reg4.lin, col = "red")
# regressao reciproca
ry = 1/y
reg4.rec = lm(ry ~ x); reg4.rec
alpha = reg4.rec$coef[1]
beta = reg4.rec$coef[2]
plot(function(x) 1/(alpha + beta*x), xlim = c(-1, 10), col = "blue", add = TRUE)
#-- Slides 8
# slide 10
alc = c(46.2, 46.3, 46.6, 46.3, 46.0, 46.9, 46.2, 46.4, 46.1,
46.5, 46.7, 46.1, 46.5, 46.7, 46.4, 46.3, 46.3, 46.8,
46.7, 45.9, 46.2, 46.3, 46.5, 46.6, 46.6, 46.6, 46.3,
46.4, 46.5, 46.4, 46.1, 46.2, 46.0, 46.6, 46.2, 46.7,
46.4, 46.2, 46.1, 46.4, 46.3, 46.3, 46.5, 46.0, 46.4,
46.0, 46.3, 46.5, 46.3, 46.4)
hist(alc, prob = TRUE, xlab = "Alcance (cm)", main = "Histograma")
curve(dnorm(x, mean(alc), sd(alc)), add = TRUE, lty = 2,
col = "blue")
boxplot(alc, ylab = "Alcance (cm)")
# slide 13
mean(alc) # estimativa alcance
sd(alc)/sqrt(length(alc)) # estimativa do erro
# slide 18
cc = (1/0.02^2)/(1/0.02^2 + 1/0.01^2); cc
mu.com = cc*0.270 + (1 - cc)*0.295; mu.com
sig2.com = 1/(1/0.02^2 + 1/0.01^2); sig2.com
sqrt(sig2.com)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment