Skip to content

Instantly share code, notes, and snippets.

@GeorgeOduor
Last active August 2, 2021 18:51
Show Gist options
  • Save GeorgeOduor/69f224220adcb5e2ed5f199d49410a8a to your computer and use it in GitHub Desktop.
Save GeorgeOduor/69f224220adcb5e2ed5f199d49410a8a to your computer and use it in GitHub Desktop.
---
title: "Basic Statistics"
author: "George"
date: "8/2/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Introduction
### Data
```{r libs}
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
```
```{r data}
data(mpg)
```
### Descriptive Statistics.
```{r}
summary(mpg)
```
```{r}
names(mpg)
colnames(mpg)
```
```{r}
mpg %>% glimpse()
```
```{r}
dim(mpg)
```
```{r}
library(kableExtra)#make beatiful table
head(mpg)
# mpg %>% head() %>% kable() %>% kable_styling(full_width = F)
kable_styling(kable(head(mpg)))
```
### Univariate Analysis
Which manufacturer has the highest number of cars.
```{r}
mpg %>%
group_by(manufacturer) %>%
tally(sort = T) %>%
dplyr::rename("Count" = "n") %>%
ggplot(aes(x = reorder(manufacturer,Count),y = Count)) +
geom_col()+coord_flip()
```
What is the model of the latest car?Which company produced it?
```{r}
subset1 = mpg[mpg$year == max(mpg$year),]
# table(subset1$manufacturer,subset1$model)/
library(vcd)
```
```{r}
mytable = table(Arthritis$Improved)
mytable
```
```{r}
prop = round(prop.table(mytable)*100,2)
prop
```
```{r}
mytable2 <- table(Arthritis$Sex,Arthritis$Improved)
mytable2
```
```{r propotions}
# props
```
```{r}
xtabs(~ Sex+Improved,data = Arthritis)
```
```{r}
addmargins(mytable2)
```
```{r}
```
ls()
rm(list = ls())
# Dataframe
# Data Visualisations
library(tidyverse)
# ggplot syntax
data("mtcars")
ggplot(data =mtcars %>%mutate(cyl = paste0(cyl,"clynders")),
aes(x = cyl,fill='red'))+
labs(title = "Mtcars Cylinders",x="Cylinders",y="Count",
subtitle = "Nigeria",caption = 'Source:Analytics Department 2021')+
theme_gray() + geom_bar() +
theme(plot.title = element_text(colour = "orange",size = "15",hjust = .5),
plot.subtitle = element_text(colour = "orange",size = "10",hjust = .5),
plot.caption = element_text(face = "italic",hjust = 1,size = 5),
legend.position = 'none')
# distribution chart
mtcars %>%
ggplot(aes(x=mpg))+
geom_histogram(bins = 20,aes(fill=as.character(vs)))+
facet_grid( . ~ cyl )
# corellation
mtcars %>%
ggplot(aes(x=drat,y = wt))+
geom_point(aes(color=as.character(vs)))+
facet_wrap(.~cyl)
age <- 29
height = 1.7
age.height = age/height
# vectors
student1 = c(age,height)
student2 = c(78,1.9)
large_vector = c(student1,student2,"Nairobi","Kampala")
# list
large_vector2 = list(student1,student2,"Nairobi","Kampala")
# access vector items
large_vector2[[1]][1]
large_vector[1]
# data types
# Numeric data types
# Character
# boolean
# Date
# Coercing
typeof(large_vector)
typeof(large_vector2)
# Basic string operations
toupper("George")
tolower("George")
paste("My name is","George",sep = "_")
paste0("My name is","George")
sprintf("Working with str %s",3445345)
#Control structures
# comparison
2 == 3
"hello" != "hell"
23 < 56
if (age < 29) {
print("Age is less than 29")
} else{
print("Age is greater than 29")
}
num = 12
if (num > 5) {
print('Bigger than 5')
}
if (num > 10) {
print('Bigger than 10')
}
myvar = ifelse(num > 5,('Bigger than 5'),
ifelse(num > 6,('Bigger than 6'),('Bigger than 5')))
for (i in student1) {
print(i)
}
x = 56
while (x <= 56 ) {
print("ddsff")
x = x+10
}
golem::detach_all_attached()
library(tidyverse)
library(openxlsx)
# files and directories
# create adirectory
dir.create("E:/Classes")
dir.exists("E:/Classes")
setwd("E:/Classes")
# write excel file
EuStockMarkets = EuStockMarkets %>%
# as.data.frame() %>%
mutate(savedate = Sys.Date())
write.xlsx(x = EuStockMarkets,"EuStockMarkets.xlsx")
from_folder = read.xlsx("EuStockMarkets.xlsx",detectDates = T)
readxl::read_excel("EuStockMarkets.xlsx")
my_simple_calc <- function() {
while (T) {
cat("What do u want to do ?
Select :
1. to add
2. to subtract
3. to multiply
")
selection = as.numeric(readline("Selection:" ))
num1 = as.numeric(readline("Value 1:" ))
num2 = as.numeric(readline("Value 2:" ))
if (selection == 1) {
result = paste("The sum of ",num1,"and",num2,"is",num1 + num2)
}else if(selection == 2) {
result = paste("The deference between ",num1,"and",num2,"is",num1 - num2)
}else if(selection == 3){
result = paste("The product of ",num1,"and",num2,"is",num1 * num2)
}
return(result)
break
}
}
my_simple_calc()
DEBUGING
# create a simple calculator sumulation
# Dataframe
# rectangular format
- rows (observation)
- columns - > variables - measurable quantities ,numeric,categorical,ordinal
sampledataframe = data.frame(
fname = c('George','Oduor'),
lname = c('William','Jane')
)
# inbuilt dataframes
mtcars
# basic operations
# shape
dim(mtcars)
# colnames
names(mtcars)
colnames(mtcars)
rownames(mtcars)
# filter
hp_above_200 = mtcars[mtcars$hp > 200,]
hp_above_100_less_6 = mtcars[mtcars$hp > 100 & mtcars$cyl < 6,]
library(tidyverse)
mtcars <- mtcars %>% as_tibble(rownames = "Cars")
# selecting columns
colnames(mtcars)
x_m_c = select(mtcars,c(Cars,mpg,disp,gear))
no_gears = select(mtcars,-gear)
# mutate
mutate(x_m_c,gear=paste(gear,"gears"),
dispsq = disp ^ 2,
disp_mpg = disp + mpg ,
mpg_gear = paste(mpg,gear,sep = "_"))
x_m_c['dispsq'] = x_m_c$disp ^ 2 #base packege
# mutate_at(x_m_c,.vars = c('mpg','disp'),.funs = function(x)sqrt(x))
x_m_c
mutate(x_m_c,test = 5+5)
x_m_c %>%
mutate(test = 5 + mpg) %>%
filter(mpg > 20,!grepl(pattern = 'Toyota',x = Cars,ignore.case = T))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment