Last active
March 24, 2024 08:05
-
-
Save graebnerc/ade2efa9b3e6a31f237924d49c9b78ed to your computer and use it in GitHub Desktop.
Data Science Using R (Spring 2024) - Session 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is the script that we developed during the session on March 21, 2024 | |
# 1. Basic commands----------- | |
# This is an addition: | |
2 + 4 | |
4 - 9 # This is substraction | |
4/9 | |
3*9 | |
2**3 | |
# This is | |
# a comment that spans | |
# over several lines. | |
# 2. Intermediate task I ----------- | |
5+12 | |
(2*3)**2 | |
2*5.8 # Beware: decimal sign in R is '.', not ',' | |
(8**2 + 5**4) / 3 | |
## 3. Assignments ----------- | |
assign("int_result", 2 + 4) | |
int_result <- 2 + 4 # Does the same as previous line | |
a1 <- 2 | |
a1 <- 4 | |
a1 # Second assignment got overridden | |
# 4. Intermediate task II ----------- | |
a_ <- 2+3 | |
b_ <- (5*a_)/2 | |
c_ <- (b_+1)**2 | |
d_ <- sqrt(c_) # c_**0.5 | |
# Hint: if you get the error "Object not found" such as here: | |
obj_x | |
# ...then you should always check the following: | |
#. 1. Did you misspell the name? | |
#. 2. Did you forget the do the assignment in the script? -> search for the name | |
# Also, if you get a weird error, always check to which objects the involved | |
# names actually point | |
# 5. Functions ----------- | |
t_vec <- c(1, 2, 3, 4) | |
# The following four lines do the same thing: | |
mean(t_vec) | |
mean(c(1,2,3,4)) | |
mean(x = t_vec) # The name of the mandatory argument 'x' is used explicitly | |
mean(x = c(1,2,3,4)) | |
help(mean) # Get information about arguments | |
# Using optional arguments: | |
t_vec <- c(1,2,3,4, NA) | |
mean(t_vec) | |
mean(t_vec, na.rm = FALSE) | |
mean(t_vec, na.rm = TRUE) | |
# From the fact that `mean(t_vec, na.rm = FALSE)` gives the same result as | |
# `mean(t_vec)` we can conclude that by default `na.rm = FALSE`. | |
# 6. Intermediate task III ----------- | |
task_vector <- c(-2, 2, 4, 6, 9, NA) | |
median(task_vector) | |
median(task_vector, na.rm = TRUE) | |
is.na(task_vector) | |
anyNA(task_vector) | |
sum(task_vector) | |
sum(task_vector, na.rm = TRUE) | |
# If in doubt use `?` or `help()`. | |
# This also explains that var() computes the sample variance: | |
var(task_vector, na.rm = TRUE) | |
# To compute the population variance you can write your own function: | |
var_pop <- function(input_vector) { | |
mean((input_vector - mean(input_vector))**2) | |
} | |
# Or correct the result of var(): | |
n_obs <- length(task_vector) # get the number of observations | |
var(task_vector)*(n_obs-1)/n_obs | |
# Although this is slightly less accurate because we count the NA | |
# 7. Defining your own functions -------------- | |
add_two_numbers <- function(nb1, nb2){ | |
result <- nb1 + nb2 | |
return(result) | |
} | |
add_two_numbers(1, 4) | |
add_two_numbers(nb1 = 1, nb2 = 4) # equivalent, but mandatory arguments set explicitly | |
nb1 # Not defined outside the function body! | |
# 8. Defining function - exercise I -------------- | |
compute_eq <- function(x_1, x_2){ | |
result <- x_1**2 + 2*x_1*x_2 + x_2**2 | |
return(result) | |
} | |
compute_eq(2, 4) | |
# 8. Defining function - exercise II -------------- | |
normalize_vec <- function(input_vector){ | |
(input_vector - min(input_vector)) / (max(input_vector) - min(input_vector)) | |
} | |
normalize_vec(c(1,2,3,4)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment