Skip to content

Instantly share code, notes, and snippets.

@eliardocosta
Last active December 5, 2023 18:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eliardocosta/f6c7f7b621340dda31bdc709717b42bf to your computer and use it in GitHub Desktop.
Save eliardocosta/f6c7f7b621340dda31bdc709717b42bf to your computer and use it in GitHub Desktop.
Links e Scripts R para a disciplina EST0323 Estatística Aplicada à Engenharia I
# upload do conjunto de dados
# Metodo 1: ir ao botao 'Import Dataset'
# Metodo 2: via comandos abaixo
install.packages("readxl") # instalando pacote 'readxl'
library(readxl) # carregando pacote 'readxl'
dados = read_excel("C:\caminho\para\arquivo\dados-estudantes.xlsx")
View(dados)
dados
dados$Fuma
dados[, 8]
attach(dados)
#-
Fuma
table(Fuma)
n = length(Fuma)
n
table(Fuma)/n
(table(Fuma)/n)*100 # em porcentagem
#-
barplot(table(Idade), xlab = "Idade", ylab = "Frequência",
main = "Gráfico de barras para Idade",
col = "grey") # frequencia
barplot(table(Idade)/n, xlab = "Idade", ylab = "Frequência Relativa",
main = "Gráfico de barras para Idade",
col = "grey") # frequencia relativa
#-
hist(Peso, right = FALSE, xlab = "Peso", ylab = "Frequência",
main = "Histograma de Peso")
hist(Peso, freq = FALSE, right = FALSE)
#-
quantile(Peso)
quantile(Peso, probs = 0.25) # Q1
quantile(Peso, probs = 0.50) # Q2
quantile(Peso, probs = 0.75) # Q3
#-
boxplot(Peso, ylab = "Peso (kg)")
boxplot(Peso, xlab = "Peso (kg)", horizontal = TRUE)
#-
boxplot(Peso ~ Fuma, ylab = "Peso (kg)")
boxplot(Peso ~ Fuma, xlab = "Peso (kg)", horizontal = TRUE)
#-
mean(Peso) # media
min(Peso) # minimo
max(Peso) # maximo
var(Peso) # variancia
sd(Peso) # desvio padrao
summary(Peso)
#-
table(Turma, Fuma)
table(Sexo, Fuma)
#-
plot(Alt, Peso, xlab = "Altura (m)", ylab = "Peso (kg)")
abline(v = mean(Alt), lty = 2)
abline(h = mean(Peso), lty = 2)
#-
cor(Alt, Peso)
#-
aggregate(Peso ~ Sexo, dados, mean)
aggregate(Peso ~ Sexo, dados, sd)
aggregate(Peso ~ Sexo, dados, min)
aggregate(Peso ~ Sexo, dados, max)
aggregate(Peso ~ Sexo, dados, summary)
#-
coplot(Peso ~ Alt | Idade, panel = panel.smooth, pch = 20,
lty = 2, xlab = "Altura")
#-
coplot(Peso ~ Alt | Idade * Turma, panel = panel.smooth, pch = 20,
lty = 2, xlab = "Altura")
#-
coplot(Peso ~ Alt | Idade * Turma, panel = panel.smooth,
col = c("blue", "green")[as.factor(Sexo)], lty = 2,
pch = 20, xlab = "Altura")
# Sexo: blue para 'F', green para 'M'
#-
coplot(Peso ~ Alt | Fuma, panel = panel.smooth, lty = 2,
pch = 20, xlab = "Altura")
#-
coplot(Peso ~ Alt | Sexo * Turma, panel = panel.smooth,
lty = 2, pch = 20, xlab = "Altura")
#-
pairs(~ Alt + Peso + Idade, pch = 20, panel = panel.smooth, lty = 2)
# Dados de altura
x = c(1.60, 1.69, 1.85, 1.85, 1.58, 1.76, 1.60, 1.64, 1.62, 1.64, 1.72, 1.66, 1.70,
1.78, 1.65, 1.63, 1.82, 1.80, 1.60, 1.68, 1.70, 1.65, 1.57, 1.55, 1.69, 1.54,
1.62, 1.62, 1.57, 1.65, 1.61, 1.71, 1.65, 1.67, 1.73, 1.60, 1.70, 1.85, 1.70,
1.73, 1.70, 1.45, 1.76, 1.68, 1.55, 1.70, 1.55, 1.60, 1.80, 1.83)
N = length(x); N # tamanho da populacao
# Altura media ----------------------------------------------------------------------
# Altura media
mean(x)
# Amostra
n = 20
amostra = sample(x, size = n, replace = FALSE)
# Distribuicao amostral da media
B = 100
medb = numeric()
for (i in 1:B){
amostrab = sample(x, size = n, replace = FALSE)
medb[i] = mean(amostrab)
}
hist(medb, prob = TRUE)
abline(v = mean(x), col = "blue")
# Erro padrão da media amostral
sd(amostra)/sqrt(n)
# Bootstrap
B = 1000
medb = numeric()
for (i in 1:B){
amostrab = sample(amostra, size = n, replace = TRUE)
medb[i] = mean(amostrab)
}
sd(medb) # Erro padrao por bootstrap
# Coeficiente de variacao -----------------------------------------------------------
sd(x)/mean(x) # Valor real do CV
# Estimativa pontual para CV
sd(amostrab)/mean(amostrab)
# Bootstrap
B = 1000
cvb = numeric()
for (i in 1:B){
amostrab = sample(amostra, size = n, replace = TRUE)
cvb[i] = sd(amostrab)/mean(amostrab)
}
sd(cvb) # Erro padrao por bootstrap
# Correlacao linear -----------------------------------------------------------------
# Dados de peso
y = c(60.5, 55.0, 72.8, 80.9, 55.0, 60.0, 58.0, 47.0, 57.8, 58.0, 70.0, 54.0, 58.0,
68.5, 63.5, 47.4, 66.0, 85.2, 54.5, 52.5, 60.0, 58.5, 49.2, 48.0, 51.6, 57.0,
63.0, 52.0, 49.0, 59.0, 52.0, 73.0, 56.0, 58.0, 87.0, 47.0, 95.0, 84.0, 60.0,
73.0, 55.0, 44.0, 75.0, 55.0, 49.0, 50.0, 54.5, 50.0, 71.0, 86.0)
plot(x, y, ylab = "Peso (kg)", xlab = "Altura (m)")
# Correlacao linear (Pearson)
cor(x, y)
# Amostra
ind = sample(1:n, size = n, replace = FALSE)
amostrax = x[ind]
amostray = y[ind]
cor(amostrax, amostray)
# Bootstrap
B = 1000
corb = numeric()
for (i in 1:B){
ind = sample(1:n, size = n, replace = TRUE)
amostraxb = x[ind]
amostrayb = y[ind]
corb[i] = cor(amostraxb, amostrayb)
}
sd(corb) # Erro padrao por bootstrap
# Slide 21 --------------------------------------------------------------------------
# upload do conjunto de dados
# Metodo 1: ir ao botao 'Import Dataset'
# Metodo 2: via comandos abaixo
install.packages("readxl") # instalando pacote 'readxl'
library(readxl) # carregando pacote 'readxl'
dados = read_excel("C:\caminho\para\arquivo\dados-estudantes.xlsx")
View(dados)
n = length(dados$Alt)
sigma = 0.09
# 90%
alpha = 0.10
mean(dados$Alt) - qnorm(1 - alpha/2)*sigma/sqrt(n) # li
mean(dados$Alt) + qnorm(1 - alpha/2)*sigma/sqrt(n) # ls
mean(dados$Alt) + c(-1, 1)*qnorm(1 - alpha/2)*sigma/sqrt(n) # li e ls
# 95%
alpha = 0.05
mean(dados$Alt) - qnorm(1 - alpha/2)*sigma/sqrt(n) # li
mean(dados$Alt) + qnorm(1 - alpha/2)*sigma/sqrt(n) # ls
mean(dados$Alt) + c(-1, 1)*qnorm(1 - alpha/2)*sigma/sqrt(n) # li e ls
# 99%
alpha = 0.01
mean(dados$Alt) - qnorm(1 - alpha/2)*sigma/sqrt(n) # li
mean(dados$Alt) + qnorm(1 - alpha/2)*sigma/sqrt(n) # ls
mean(dados$Alt) + c(-1, 1)*qnorm(1 - alpha/2)*sigma/sqrt(n) # li e ls
# Slide 30 --------------------------------------------------------------------------
n = 8
alpha = 0.05
30.2 - qt(1 - alpha/2, df = n - 1)*3.1/sqrt(n) # li
30.2 + qt(1 - alpha/2, df = n - 1)*3.1/sqrt(n) # ls
# Slide 32 --------------------------------------------------------------------------
n = length(dados$Alt)
# 90%
alpha = 0.10
mean(dados$Alt) - qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # li
mean(dados$Alt) + qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # ls
mean(dados$Alt) + c(-1, 1)*qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n)
# 95%
alpha = 0.05
mean(dados$Alt) - qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # li
mean(dados$Alt) + qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # ls
mean(dados$Alt) + c(-1, 1)*qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n)
# 99%
alpha = 0.01
mean(dados$Alt) - qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # li
mean(dados$Alt) + qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # ls
mean(dados$Alt) + c(-1, 1)*qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n)
# Slide 33 --------------------------------------------------------------------------
elas = c(10490, 16620, 17300, 15480, 12970, 17260, 13400, 13900,
13630, 13260, 14370, 11700, 15470, 17840, 14070, 14760)
n = length(elas)
alpha = 0.05
mean(elas) + c(-1, 1)*qt(1 - alpha/2, df = n - 1)*sd(elas)/sqrt(n)
# Slide 34 --------------------------------------------------------------------------
carga = c(19.8, 10.1, 14.9, 7.5, 15.4, 15.4, 15.4, 18.5, 7.9, 12.7, 11.9,
11.4, 11.4, 14.1, 17.6, 16.7, 15.8, 19.5, 8.8, 13.6, 11.9, 11.4)
n = length(carga)
alpha = 0.01
mean(carga) + c(-1, 1)*qt(1 - alpha/2, df = n - 1)*sd(carga)/sqrt(n)
# Slide 39 --------------------------------------------------------------------------
n = 110
alpha = 0.01
0.81 + c(-1, 1)*qnorm(1 - alpha/2)*0.34/sqrt(n)
# Slide 41 --------------------------------------------------------------------------
con = c(1.230, 0.490, 0.490, 1.080, 0.590, 0.280, 0.180, 0.100, 0.940,
1.330, 0.190, 1.160, 0.980, 0.340, 0.340, 0.190, 0.210, 0.400,
0.040, 0.830, 0.050, 0.630, 0.340, 0.750, 0.040, 0.860, 0.430,
0.044, 0.810, 0.150, 0.560, 0.840, 0.870, 0.490, 0.520, 0.250,
1.200, 0.710, 0.190, 0.410, 0.500, 0.560, 1.100, 0.650, 0.270,
0.270, 0.500, 0.770, 0.730, 0.340, 0.170, 0.160, 0.270)
n = length(con)
alpha = 0.05
mean(con) + c(-1, 1)*qnorm(1 - alpha/2)*sd(con)/sqrt(n)
# Slides 49-50 ----------------------------------------------------------------------
n = 82
x = 10
p = x/n; p
alpha = 0.05
p + c(-1, 1)*qnorm(1 - alpha/2)*sqrt(p*(1 - p)/n)
# Slides 51-52 ----------------------------------------------------------------------
n = 770
x = 412
p = x/n; p
alpha = 0.01
p + c(-1, 1)*qnorm(1 - alpha/2)*sqrt(p*(1 - p)/n)
# Slide 53 --------------------------------------------------------------------------
n = length(dados$Fuma)
x = as.numeric(table(dados$Fuma)[2])
p = x/n; p
alpha = 0.05
p + c(-1, 1)*qnorm(1 - alpha/2)*sqrt(p*(1 - p)/n)
# Slide 58 --------------------------------------------------------------------------
# Altura media
n = length(dados$Alt)
B = 10000
medb = numeric()
for (i in 1:B){
xb = sample(dados$Alt, size = n, replace = TRUE)
medb[i] = mean(xb)
}
alpha = 0.05
quantile(medb, probs = c(alpha/2, 1 - alpha/2)) # IC por bootstrap
mean(dados$Alt) + c(-1, 1)*qt(1 - alpha/2, df = n - 1)*sd(dados$Alt)/sqrt(n) # IC com suposicao de normalidade
# Proporcao de fumantes
n = length(dados$Fuma)
B = 10000
propb = numeric()
for (i in 1:B){
ind = sample(1:n, size = n, replace = TRUE)
amosb = dados$Fuma[ind]
xb = as.numeric(table(amosb)[2])
propb[i] = ifelse(is.na(xb), 0, xb/n)
}
alpha = 0.05
quantile(propb, probs = c(alpha/2, 1 - alpha/2))
# Animacao
# No link https://yihui.org/animation/example/least-squares/
# Ou pelos comandos abaixo:
library(animation)
least.squares()
least.squares(ani.type = "intercept")
# slide 9
# nivel de hidrocarboneto (%)
x = c(0.99, 1.02, 1.15, 1.29, 1.46, 1.36, 0.87, 1.23, 1.55, 1.40,
1.19, 1.15, 0.98, 1.01, 1.11, 1.20, 1.26, 1.32, 1.43, 0.95)
# pureza (%)
y = c(90.01, 89.05, 91.43, 93.74, 96.73, 94.45, 87.59, 91.77, 99.42,
93.65, 93.54, 92.52, 90.56, 89.54, 89.85, 90.39, 93.25, 93.41,
94.98, 87.33)
plot(x, y)
n = length(x)
Sx = sum(x)
Sy = sum(y)
Sxx = sum(x^2)
Sxy = sum(x*y)
b2 = (Sxy - Sx*Sy/n)/(Sxx - Sx^2/n); b2
b1 = mean(y) - b2*mean(x); b1
reg.ex1 = lm(y ~ x)
reg.ex1
abline(reg.ex1, col = "red", lty = 2)
# slide 12
data(cars)
str(cars)
attach(cars)
plot(speed, dist)
reg.ex2 = lm(dist ~ speed)
reg.ex2
abline(reg.ex2, col = "red")
#- slide 16
# dados
x = c(5.61, 8.27, 7.68, 5.05, 7.09, 7.61, 7.12, 4.13, 8.52, 9.86, 2.22, 8.70,
6.83, 6.54, 8.56, 4.01, 6.04, 8.94, 9.67, 6.37, 3.12, 9.64, 5.14, 4.15,
6.58)
y = c(43.34, 158.18, 111.18, 34.32, 89.76, 77.28, 61.17, 15.38, 174.04, 391.17,
7.14, 101.25, 73.09, 57.06, 146.79, 12.45, 38.65, 201.70, 254.11, 45.75,
7.48, 272.43, 18.73, 17.36, 72.87)
plot(x, y, ylim = c(-2, 401))
# regressao linear
reg1.lin = lm(y ~ x); reg1.lin
abline(reg1.lin, col = "red")
# regressao exponencial
ly = log(y)
reg1.exp = lm(ly ~ x); reg1.exp
alpha = exp(reg1.exp$coef[1]); alpha
beta = reg1.exp$coef[2]; beta
plot(function(x) alpha*exp(beta*x), xlim = c(2, 11), col = "blue", add = TRUE)
#- slide 17
library(MASS)
data(steam)
plot(steam$Temp, steam$Press, xlab = "Temperatura", ylab = "Pressão")
# regressao linear
reg2.lin = lm(Press ~ Temp, data = steam); reg2.lin
abline(reg2.lin, col = "red")
# regressao exponencial
reg2.exp = lm(I(log(Press)) ~ Temp, data = steam); reg2.exp
alpha = exp(reg2.exp$coef[1]); alpha
beta = reg2.exp$coef[2]; beta
plot(function(x) alpha*exp(beta*x), xlim = c(0, 110), col = "blue", add = TRUE)
#- slide 18
# dados
x = c(1.35, 2.35, 2.13, 1.15, 1.91, 2.10, 1.92, 0.80, 2.45, 2.95, 0.08, 2.51,
1.81, 1.70, 2.46, 0.75, 1.52, 2.60, 2.87, 1.64, 3.98, 9.69, 5.75, 4.88,
7.01, 9.39, 9.54, 9.16, 9.62, 8.70)
y = c(1.04, 1.36, 1.26, 1.09, 1.27, 1.22, 1.21, 0.90, 1.36, 1.32, 0.47, 1.26,
1.18, 1.17, 1.35, 0.87, 1.08, 1.37, 1.32, 1.14, 1.44, 1.90, 1.62, 1.60,
1.82, 1.96, 2.07, 1.92, 2.10, 1.91)
plot(x, y)
# regressao linear
reg3.lin = lm(y ~ x); reg3.lin
abline(reg3.lin, col = "red")
# regressao potencia
ly = log(y)
lx = log(x)
reg3.pot = lm(ly ~ lx); reg3.pot
alpha = exp(reg3.pot$coef[1]); alpha
beta = reg3.pot$coef[2]; beta
plot(function(x) alpha*x^beta, xlim = c(0, 10), col = "blue", add = TRUE)
#- slide 19
# dados
x = c(1.35, 2.35, 2.13, 1.15, 1.91, 2.10, 1.92, 0.80, 2.45, 2.95, 0.08, 2.51,
1.81, 1.70, 2.46, 0.75, 1.52, 2.60, 2.87, 1.64, 3.98, 9.69, 5.75, 4.88,
7.01, 9.39, 9.54, 9.16, 9.62, 8.70)
y = c(0.19, 0.08, 0.09, 0.12, 0.09, 0.11, 0.11, 0.25, 0.08, 0.08, 0.41, 0.10,
0.11, 0.11, 0.08, 0.35, 0.15, 0.07, 0.08, 0.13, 0.07, 0.03, 0.05, 0.05,
0.04, 0.03, 0.03, 0.03, 0.03, 0.03)
plot(x, y)
# reg linear
reg4.lin = lm(y ~ x); reg4.lin
abline(reg4.lin, col = "red")
# regressao reciproca
ry = 1/y
reg4.rec = lm(ry ~ x); reg4.rec
alpha = reg4.rec$coef[1]
beta = reg4.rec$coef[2]
plot(function(x) 1/(alpha + beta*x), xlim = c(-1, 10), col = "blue", add = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment