s2t2/R.md

## R.md

      
    Raw
  

              R.md
            
          
    Learning R

Course Prep: https://jhelvy.github.io/r4aPrimer/L0-course-prep.html
Installation

In the Cloud

Sign up for cloud r studio account: https://rstudio.cloud/
How to clear the console / start over? ctrl + L
https://support.rstudio.com/hc/en-us/articles/200404846-Working-in-the-Console-in-the-RStudio-IDE
Local Development

Install R:
brew install r
Package Management

Installing:
install.packages("packagename") 
Importing:
library("packagename") 
Inspecting:
library()
library("packagename")
Invoking:
provided_func()

# or ...
packagename::provided_func()
Language Overview

https://jhelvy.github.io/r4aPrimer/L1-getting-started.html
Printing and logging

No explicit print / log statement needed?
Variables

Assignment:
x <- 2
Logical Operators

Same as Python. Except instead of and and or, use & and | like Ruby.
Datatypes

typeof(TRUE) #> "logical"
typeof(FALSE) #> "logical"

typeof("hello") #> "character"


typeof(3.0)  #> "double"
typeof(3)  #> "double"
typeof(3L) #> finally "integer"
null / other values?
typeof(Inf) #> "double"
typeof(NaN) #> "double"
typeof(NULL) #> "NULL"


Datatype conversions:
as.numeric("3.14") * 100 #> 314
as.integer(3.14) #> 3

as.character(3.14)
Datatype comparisons:
is.numeric(3.14)

!is.null(x)

COOL!

Vectors (Arrays)

x <- c(1, 2, 3)

length(x)
mean(x) 	
median(x) 	
max(x) 	
min(x) 	
sum(x) 	
prod(x) 	
Vector Comparison:
x <- c(1,2,3)
y <- c(1,2,3)
x == y #> TRUE TRUE TRUE

all(x == y) #> TRUE
String Vector Concatenation:
stringVector <- c('hello', 'world')

length(stringVector) #> 2

# INDEXES START AT 1 !?
stringVector[1] #> "hello"
stringVector[2] #> "hello"

typeof(stringVector) #> "character"
is.vector(stringVector) #> TRUE

library(stringr)
y <- str_c(stringVector, collapse = ' ')
typeof(y) #> "character"
is.vector(stringVector) #> TRUE

is.vector("HELLO") #> TRUE
Vector operations perform matrix multiplication:
x = c(1,2,3)
y = c(10, 10, 10)

X + y #> 11, 12, 13
x * y #> 10, 20, 30
Sorting vectors (not mutating):
a = c(2, 4, 6, 3, 1, 5)

sort(a)

sort(a, decreasing = TRUE)
Data Frames

https://jhelvy.github.io/r4aPrimer/L3-data-frames.html
teams <- data.frame(
    city   = c("Boston", "New York", "New York", "New Haven"),
    name    = c("Red Sox", "Yankees", "Mets", "Ravens"),
    league  = c("major", "major", "major", "minor")
)
ypeof(teams) #> list WAT
typeof(teams["city"]) #> list WAT

length(teams) #> 3
 length(teams["city"]) #> 1 WAT
Inspecting / previewing the rows:
View(teams)

head(teams)

tail(teams)

glimpse(teams)

summary(teams)
Smarter? Data Frames:
# install.packages('dplyr')
library(dplyr)

teams <- dplyr::tibble(
  city   = c("Boston", "New York", "New York", "New Haven"),
  name    = c("Red Sox", "Yankees", "Mets", "Ravens"),
  league  = c("major", "major", "major", "minor")
)
length(teams)
teams$city #> 
teams$city[1] #> "Boston"
CSV Files

Create new test CSV file called gradebook.csv
library(here)
library(readr)

# csv_filepath = here('data', 'gradebook.csv')
csv_filepath = here('gradebook.csv')  #> "/cloud/project/gradebook.csv"
gradebook_df <- read_csv(csv_filepath)

grades = gradebook_df$final_grade
min(grades)
max(grades)
Dataviz

plot(x = gradebook_df$student_id, y = gradebook_df$final_grade)

hist(x = gradebook_df$final_grade)
 
# https://www.geeksforgeeks.org/r-bar-charts/
barplot(gradebook_df$final_grade, main="Title", xlab="X Axis", ylab="Y Axis")
Let's get real and use plotly.
# https://stackoverflow.com/questions/30711019/better-string-interpolation-in-r
# https://www.alphavantage.co/documentation/#dailyadj
# https://plotly.com/r/line-charts/

library(glue)
library(readr)
#library(dplyr)
library(plotly)

api_key <- "demo"
symbol <- "NFLX"

# READ CSV FILE

csv_filepath <- glue::glue("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={api_key}&datatype=csv")

prices_df <- read_csv(csv_filepath)

View(prices_df)

# DATAVIZ

chart_title <- glue::glue("Stock Prices ({symbol})")


#fig <- plot_ly(prices_df, x = ~timestamp, y = ~adjusted_close, type = 'scatter', mode = 'lines')

fig <- plot_ly(prices_df, x = ~timestamp, y = ~adjusted_close, 
              type = 'scatter', mode = 'lines') %>% layout(title = chart_title)

fig
Wrangling

df %>%
  filter(!is.na(height)) %>%
  group_by(state) %>%          # Here we're grouping by state
  mutate(mean_height = mean(height)) %>%
  select(state, mean_height)
Grouping / Aggregation

https://jhelvy.github.io/r4aPrimer/L4-data-wrangling.html#31_The_group_by()_function
Help!
library(here)
library(readr)
library(dplyr)
library(ggplot2)

csv_filepath = "https://raw.githubusercontent.com/prof-rossetti/intro-to-python/main/data/monthly-sales/sales-201803.csv"
sales_df <- read_csv(csv_filepath)


# DATAVIZ

sales_df$sales_price <- sales_df$"sales price"

products_pivot <- sales_df %>%
  group_by(product) %>%   
  mutate(sales_total = sum(sales_price)) %>%
  select(product, sales_total)


View(products_pivot)
Environment Variables

https://github.com/gaborcsardi/dotenv
https://stat.ethz.ch/R-manual/R-devel/library/base/html/Sys.getenv.html
``
library(dotenv)
api_key <- Sys.getenv("API_KEY")
api_key #> ""

> NOTE: untested