Created
October 6, 2022 12:33
-
-
Save graebnerc/53a88869a8d178efea90fff92ab272f5 to your computer and use it in GitHub Desktop.
Lecture notes and solutions to the exercises of session 4 in the fall semester 2022
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Lecture notes and solutions to the exercises of session 4 in the fall semester 2022 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Lecture notes session 4 | |
# Factors------------------------------ | |
# Creating a factor | |
expl_fac <- factor( | |
c("male", "male", "female"), | |
levels = unique(c("male", "female", "diverse")) | |
) | |
expl_fac | |
# What happens if you create a factor that contains | |
# elements not contained in the levels-vector of the factor? | |
element_vec <- c("male", "male", "female") | |
level_vec <- c("female") | |
test_fac <- factor( | |
x = element_vec, | |
levels = level_vec) | |
test_fac | |
# Test for factors: | |
typeof(test_fac) # Beware!! | |
is.factor(test_fac) | |
# Digression: types of measurement----- | |
# Make sure you recap the four levels of measurement: | |
# Nominal: the data can only be categorized | |
# Example: Gender, city of birth,... | |
# Ordinal: the data can be categorized and ranked | |
# Example: Language skill (A1, A2,...), responses to Likert skales,... | |
# Interval: the data can be categorized, ranked, and evenly spaced | |
# Example: Temperature (Celsius, Fahrenheit), IQ scales, ... | |
# Ratio: the data can be categorized, ranked, evenly spaced, and has a natural zero. | |
# Example: Temperature (Kelvin), age, height, income,... | |
# To store nominal data we usually use the type character or (unordered) factor | |
# To store ordinal data we use an ordered factor | |
# To store interval or ratio data we use doubles or integers | |
# Here is how you create an ordered factor: | |
ordered_factor <- factor( | |
x = c("good", "okay", "bad", "good"), | |
ordered = TRUE) | |
ordered_factor # The sequence is strange, though... | |
# To change it specify the levels explicitly in the right sequence: | |
ordered_factor_right <- factor( | |
x = c("good", "okay", "bad", "good"), | |
levels = c("bad", "okay", "good"), | |
ordered = TRUE) | |
ordered_factor_right | |
# data.frames-------------------------- | |
l_1 <- list( | |
"column_1" = c(1, 2, 3, 4), | |
"column_2" = c("a", "b", "c") | |
) | |
df_1 <- data.frame( | |
"column_1" = c(1, 2, 3), | |
"column_2" = c("a", "b", "c") | |
) | |
df_1 | |
df_2 <- data.frame( | |
"nb"=seq(1, 50), | |
"ch"=rep("Hallo", 50) | |
) | |
str(df_2) | |
typeof(df_2$ch) | |
# Tibble | |
library(tibble) | |
df_1 | |
tib_1 <- tibble::as_tibble(df_1) | |
tib_1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment