Last active
September 6, 2019 12:11
-
-
Save BroVic/0edb59b9e21cc2febacc67e9c0c37b46 to your computer and use it in GitHub Desktop.
Quick exercise to introduce R atomic vectors
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################### | |
# # | |
# Lists # | |
# # | |
##################### | |
# Note that the function arguments used in this sample script are rather verbose | |
# just to enhance your understanding. You DO NOT always need to specify arguments. | |
## We MAY first of all create components: | |
# 10,000 random numbers ranging from -10 through 20 inclusive | |
num_vec <- runif(n = 10000, min = -10, max = 20) # numerical vector | |
# identify negative values of num_vec (and label them as 'TRUE') | |
num_vec_neg <- ifelse(test = num_vec < 0, yes = TRUE, no = FALSE) # logical vector | |
# make a character matrix of 4 colour names | |
x <- c("yellow", "red", "fuchsia", "orange") | |
coloured <- matrix(data = rep(x, times = 60), nrow = 10, byrow = TRUE) | |
# Note: | |
# 1. We did not specify no. of columns for the matrix | |
# 2. Run dim() - what do you think rep() does? | |
# add a row at the bottom of 'coloured' with elements called 'blank' | |
(char_mat <- rbind(coloured, rep(x = "blank", times = ncol(coloured)))) | |
# collect names of files in this and immediate 2 parent folders | |
treee <- list(list.files(), list.files(path = ".."), list.files(path = "../../")) | |
# create a function that diplays area of a circle for given radius | |
Circle <- function(r) { | |
pi * (r * r) | |
} | |
# collect the definition of the function 'table()' | |
table_def <- table | |
# Download a CSV file from the web and use to create a data frame in R | |
download.file(url = "https://download.microsoft.com/download/4/C/8/4C830C0C-101F-4BF2-8FCB-32D9A8BA906A/Import_User_Sample_en.csv", | |
destfile = "microsoft_sample.csv", | |
mode = "w") # download sample file from Microsoft website | |
DF <- read.csv(file = "microsoft_sample.csv") # data frane from dowloaded file | |
hex <- charToRaw("National Environmental Standards and Regulations Enforcement Agency") | |
List_in_bits <- rawToBits(hex) | |
# Now compile all these into a list | |
mumbo_jumbo <- list(num_vec, | |
num_vec_neg, | |
coloured, | |
char_mat, | |
treee, | |
Circle, | |
table_def, | |
DF, | |
hex, | |
List_in_bits) | |
mumbo_jumbo # display the entire list in the console (if you can!) | |
str(mumbo_jumbo) | |
# is this actually a list? | |
typeof(mumbo_jumbo) | |
# Emphasize the existence of lists of lists i.e. nested lists | |
lister <- list(list(list(list(1)))) # Source: http://adv-r.had.co.nz/Data-structures.html | |
lister | |
str(lister) | |
typeof(lister) | |
#END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a simple matrix, mat, for the lecture | |
# Use of matrix() | |
# A matrix of number 1 - 12, with 3 rows and 4 columns. The numbers are to run | |
# the columns, and not by the rows. | |
mat <- matrix(1:12, nrow = 3, ncol = 4, byrow = FALSE) | |
mat | |
# Use of cbind() | |
# Add a column to matrix, mat, using the nos 8 - 6 (i.e. backwards) | |
# First make a vector of the right length | |
colvec <- 8:6 | |
colvec | |
# Then 'bind' them together | |
mat <- cbind(mat, colvec) | |
mat | |
# Use of rbind() | |
# Add a row to the bottom of matrix, mat using nos 77 - 81 | |
# Make a vector of the right length | |
rowvec <- 77:81 | |
rowvec | |
# Now join this vector to update matrix, mat | |
mat <- rbind(mat, rowvec) | |
mat | |
# remove dimension names (names were added because of vector we created) | |
# and we don't want them for this exercise. Note that we only created | |
# vectors first for easy understanding of R beginners. | |
dimnames(mat) <- NULL | |
mat | |
# NB: Actually, all of this could be done in fewer lines of codes, but | |
# it's good to understand the logic this way!!! | |
# Your turn! | |
# 1. Create a vector with 10 random numbers between 10 and 20. Hint: ?runif() | |
# Give the vector any name you like | |
# BONUS: Set the seed to 1 | |
set.seed(1) | |
randomNum <- runif(10, 10, 20) | |
# 2. Create a vector called firstName with 10 random first names | |
# from data in the babynames package. Download and install if necessary | |
# BONUS: Write a line of code that will enable you to check if a package | |
# is installed or not. | |
install.packages('babynames') | |
library(babynames) | |
help(package = 'babynames') | |
str(babynames) | |
allNames <- babynames$name | |
firstName <- sample(allNames, 10) | |
'babynames' %in% .packages(all.available = TRUE) | |
"babynames" %in% installed.packages()[, "Package"] | |
if (requireNamespace("babynames")) print("package is there") | |
# firstName <- sample(babynames::babynames$name, 10) | |
# 3. Create a vector with a sequence from 1 to 10. Call it serialNo. | |
serialNo <- seq_len(10) | |
# 4. Using the vector from step 2 above as a guide, create a character vector with elements | |
# "M" and "F" named gender. Thereafter convert it into a categorical variable i.e. factor | |
# BONUS: The factor's levels are presented alphabetically by default. Make it | |
# such that when you run levels(gender), the first element is "M". | |
firstName | |
gender <- c('F', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F') | |
gender <- factor(gender, levels = c('M', "F")) | |
gender | |
# 5. Create a matrix called randMat with columns in this order: serialNo, firstName | |
# gender and the one with the random numbers | |
randMat <- cbind(serialNo, firstName, gender, randomNum) | |
randMat | |
# 6. Examine the matrix. What do you notice? | |
# BONUS: Check the type of the matrix and try carrying out arithmetic on the | |
# 'numerical' data | |
typeof(randMat) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create-vector.R | |
# Run the script and study the output carefully to understand what's going on | |
# Ex. 1: Make a vector and examine it | |
prac <- "Use typeof() to check what type of vector this is" | |
prac | |
typeof(prac) | |
str(prac) | |
prac <- "Use length() to check how many elements are in the vector 'prac'" | |
prac | |
length(prac) | |
prac <- c("Thank", "you!") | |
prac | |
length(prac) | |
prac[1] | |
prac[2] | |
prac[3] | |
length(prac) # Note that position 3 has no element, and does not exist | |
#Bonus functions: Two common, very different ways of displaying console output | |
print(prac) | |
cat(prac) | |
# Now let's see how one can extend a vector | |
set.seed(1345) # for pseudo-randomness | |
prac <- runif(6, min = 3, max = 10) # random uniform distribution from 3 - 10 | |
prac | |
length(prac) # length = 6 | |
prac[7] <- 8.500000 # add | |
prac | |
length(prac) # length = 7 | |
prac[9] <- 6.99076 # number is on position 9 | |
prac | |
length(prac) | |
prac[9] | |
prac[8] | |
prac <- append(prac, 7.89224) # append() adds elements to vectors | |
length(prac) | |
prac[10] | |
prac[6] | |
prac <- append(prac, 4.33165, after = 5) # insertion; ?append for details | |
prac[6] | |
length(prac) | |
str(prac) | |
is.numeric(prac) | |
is.integer(prac) # integer = numeric, not vice versa | |
is.double(prac) # double & numeric are syonymous | |
# Bonus functions: Are there NAs (i.e. missing values) in the result? | |
anyNA(prac) | |
is.na(prac) | |
which(is.na(prac)) | |
# Ex. 3: Now let us shrink the vector | |
length(prac) | |
prac | |
# One method | |
prac_short1 <- prac[1:7] | |
prac_short1 | |
length(prac_short1) | |
# Another method | |
prac | |
prac_short2 <- prac[-c(8:11)] | |
prac_short2 | |
length(prac_short2) | |
# Bonus function: Do both methods yield the same result? | |
identical(prac_short1, prac_short2) | |
# Finally, a small brain teaser... | |
prac[9:11] <- NA | |
prac | |
length(prac) | |
identical(prac, prac_short1) | |
# How would you solve this tricky condition i.e. make these 2 objects (vectors) | |
# to be exactly the same? Push your solution to your forked repository and | |
# create pull request. | |
## END. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment