BroVic/create-list.R

## create-list.R
#####################
#                   #
#      Lists        #
#                   #
#####################
# Note that the function arguments used in this sample script are rather verbose
# just to enhance your understanding. You DO NOT always need to specify arguments.

## We MAY first of all create components:

# 10,000 random numbers ranging from -10 through 20 inclusive
num_vec <- runif(n = 10000, min = -10, max = 20) # numerical vector

# identify negative values of num_vec (and label them as 'TRUE')
num_vec_neg <- ifelse(test = num_vec < 0, yes = TRUE, no = FALSE) # logical vector

# make a character matrix of 4 colour names
x <- c("yellow", "red", "fuchsia", "orange")
coloured <- matrix(data = rep(x, times = 60), nrow = 10, byrow = TRUE)
# Note:
# 1. We did not specify no. of columns for the matrix
# 2. Run dim() - what do you think rep() does?

# add a row at the bottom of 'coloured' with elements called 'blank'
(char_mat <- rbind(coloured, rep(x = "blank", times = ncol(coloured))))

# collect names of files in this and immediate 2 parent folders
treee <- list(list.files(), list.files(path = ".."), list.files(path = "../../"))

# create a function that diplays area of a circle for given radius
Circle <- function(r) {
  pi * (r * r)
}

# collect the definition of the function 'table()'
table_def <- table

# Download a CSV file from the web and use to create a data frame in R
download.file(url = "https://download.microsoft.com/download/4/C/8/4C830C0C-101F-4BF2-8FCB-32D9A8BA906A/Import_User_Sample_en.csv",
              destfile = "microsoft_sample.csv",
              mode = "w")   # download sample file from Microsoft website
DF <- read.csv(file = "microsoft_sample.csv") # data frane from dowloaded file

hex <- charToRaw("National Environmental Standards and Regulations Enforcement Agency")

List_in_bits <- rawToBits(hex)

# Now compile all these into a list
mumbo_jumbo <- list(num_vec,
                    num_vec_neg,
                    coloured,
                    char_mat,
                    treee,
                    Circle,
                    table_def,
                    DF,
                    hex,
                    List_in_bits)
mumbo_jumbo     # display the entire list in the console (if you can!)

str(mumbo_jumbo)

# is this actually a list?
typeof(mumbo_jumbo)


# Emphasize the existence of lists of lists i.e. nested lists
lister <- list(list(list(list(1)))) # Source: http://adv-r.had.co.nz/Data-structures.html
lister
str(lister)
typeof(lister)
#END

## create-matrix.R
# Create a simple matrix, mat, for the lecture

# Use of matrix()
# A matrix of number 1 - 12, with 3 rows and 4 columns. The numbers are to run
# the columns, and not by the rows.
mat <- matrix(1:12, nrow = 3, ncol = 4, byrow = FALSE)
mat

# Use of cbind()
# Add a column to matrix, mat, using the nos 8 - 6 (i.e. backwards)
# First make a vector of the right length
colvec <- 8:6
colvec
# Then 'bind' them together
mat <- cbind(mat, colvec)
mat

# Use of rbind()
# Add a row to the bottom of matrix, mat using nos 77 - 81
# Make a vector of the right length
rowvec <- 77:81
rowvec
# Now join this vector to update matrix, mat
mat <- rbind(mat, rowvec)
mat

# remove dimension names (names were added because of vector we created)
# and we don't want them for this exercise. Note that we only created
# vectors first for easy understanding of R beginners.
dimnames(mat) <- NULL
mat

# NB: Actually, all of this could be done in fewer lines of codes, but
# it's good to understand the logic this way!!!

# Your turn!
# 1. Create a vector with 10 random numbers between 10 and 20. Hint: ?runif()
# Give the vector any name you like
# BONUS: Set the seed to 1
set.seed(1)
randomNum <- runif(10, 10, 20)

# 2. Create a vector called firstName with 10 random first names
# from data in the babynames package. Download and install if necessary
# BONUS: Write a line of code that will enable you to check if a package
# is installed or not.
install.packages('babynames')
library(babynames)
help(package = 'babynames')
str(babynames)
allNames <- babynames$name
firstName <- sample(allNames, 10)

'babynames' %in% .packages(all.available = TRUE)
"babynames" %in% installed.packages()[, "Package"]
if (requireNamespace("babynames")) print("package is there")

# firstName <- sample(babynames::babynames$name, 10)


# 3. Create a vector with a sequence from 1 to 10. Call it serialNo.
serialNo <- seq_len(10)

# 4. Using the vector from step 2 above as a guide, create a character vector with elements
# "M" and "F" named gender. Thereafter convert it into a categorical variable i.e. factor
# BONUS: The factor's levels are presented alphabetically by default. Make it
# such that  when you run levels(gender), the first element is "M".
firstName
gender <- c('F', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F')
gender <- factor(gender, levels = c('M', "F"))
gender

# 5. Create a matrix called randMat with columns in this order: serialNo, firstName
# gender and the one with the random numbers
randMat <- cbind(serialNo, firstName, gender, randomNum)
randMat

# 6. Examine the matrix. What do you notice?
# BONUS: Check the type of the matrix and try carrying out arithmetic on the
# 'numerical' data
typeof(randMat)

## create-vector.R
# create-vector.R

# Run the script and study the output carefully to understand what's going on

# Ex. 1: Make a vector and examine it
prac <- "Use typeof() to check what type of vector this is"
prac
typeof(prac)
str(prac)

prac <- "Use length() to check how many elements are in the vector 'prac'"
prac
length(prac)

prac <- c("Thank", "you!")
prac
length(prac)

prac[1]
prac[2]
prac[3]
length(prac)          # Note that position 3 has no element, and does not exist

#Bonus functions: Two common, very different ways of displaying console output
print(prac)
cat(prac)

# Now let's see how one can extend a vector
set.seed(1345)                      # for pseudo-randomness
prac <- runif(6, min = 3, max = 10) # random uniform distribution from 3 - 10
prac

length(prac)                        # length = 6
prac[7] <- 8.500000                 # add
prac
length(prac)                        # length = 7

prac[9] <- 6.99076                  # number is on position 9
prac
length(prac)
prac[9]
prac[8]

prac <- append(prac, 7.89224)       # append() adds elements to vectors
length(prac)
prac[10]

prac[6]
prac <- append(prac, 4.33165, after = 5)    # insertion; ?append for details
prac[6]
length(prac)

str(prac)
is.numeric(prac)
is.integer(prac)                            # integer = numeric, not vice versa
is.double(prac)                             # double & numeric are syonymous

# Bonus functions: Are there NAs (i.e. missing values) in the result?
anyNA(prac)
is.na(prac)
which(is.na(prac))


# Ex. 3: Now let us shrink the vector
length(prac)
prac

# One method
prac_short1 <- prac[1:7]
prac_short1
length(prac_short1)

# Another method
prac
prac_short2 <- prac[-c(8:11)]
prac_short2
length(prac_short2)

# Bonus function: Do both methods yield the same result?
identical(prac_short1, prac_short2)

# Finally, a small brain teaser...
prac[9:11] <- NA
prac
length(prac)
identical(prac, prac_short1)
# How would you solve this tricky condition i.e. make these 2 objects (vectors)
# to be exactly the same? Push your solution to your forked repository and
# create pull request.

## END.
	#####################
	# #
	# Lists #
	# #
	#####################
	# Note that the function arguments used in this sample script are rather verbose
	# just to enhance your understanding. You DO NOT always need to specify arguments.

	## We MAY first of all create components:

	# 10,000 random numbers ranging from -10 through 20 inclusive
	num_vec <- runif(n = 10000, min = -10, max = 20) # numerical vector

	# identify negative values of num_vec (and label them as 'TRUE')
	num_vec_neg <- ifelse(test = num_vec < 0, yes = TRUE, no = FALSE) # logical vector

	# make a character matrix of 4 colour names
	x <- c("yellow", "red", "fuchsia", "orange")
	coloured <- matrix(data = rep(x, times = 60), nrow = 10, byrow = TRUE)
	# Note:
	# 1. We did not specify no. of columns for the matrix
	# 2. Run dim() - what do you think rep() does?

	# add a row at the bottom of 'coloured' with elements called 'blank'
	(char_mat <- rbind(coloured, rep(x = "blank", times = ncol(coloured))))

	# collect names of files in this and immediate 2 parent folders
	treee <- list(list.files(), list.files(path = ".."), list.files(path = "../../"))

	# create a function that diplays area of a circle for given radius
	Circle <- function(r) {
	pi * (r * r)
	}

	# collect the definition of the function 'table()'
	table_def <- table

	# Download a CSV file from the web and use to create a data frame in R
	download.file(url = "https://download.microsoft.com/download/4/C/8/4C830C0C-101F-4BF2-8FCB-32D9A8BA906A/Import_User_Sample_en.csv",
	destfile = "microsoft_sample.csv",
	mode = "w") # download sample file from Microsoft website
	DF <- read.csv(file = "microsoft_sample.csv") # data frane from dowloaded file

	hex <- charToRaw("National Environmental Standards and Regulations Enforcement Agency")

	List_in_bits <- rawToBits(hex)

	# Now compile all these into a list
	mumbo_jumbo <- list(num_vec,
	num_vec_neg,
	coloured,
	char_mat,
	treee,
	Circle,
	table_def,
	DF,
	hex,
	List_in_bits)
	mumbo_jumbo # display the entire list in the console (if you can!)

	str(mumbo_jumbo)

	# is this actually a list?
	typeof(mumbo_jumbo)


	# Emphasize the existence of lists of lists i.e. nested lists
	lister <- list(list(list(list(1)))) # Source: http://adv-r.had.co.nz/Data-structures.html
	lister
	str(lister)
	typeof(lister)
	#END
	# Create a simple matrix, mat, for the lecture

	# Use of matrix()
	# A matrix of number 1 - 12, with 3 rows and 4 columns. The numbers are to run
	# the columns, and not by the rows.
	mat <- matrix(1:12, nrow = 3, ncol = 4, byrow = FALSE)
	mat

	# Use of cbind()
	# Add a column to matrix, mat, using the nos 8 - 6 (i.e. backwards)
	# First make a vector of the right length
	colvec <- 8:6
	colvec
	# Then 'bind' them together
	mat <- cbind(mat, colvec)
	mat

	# Use of rbind()
	# Add a row to the bottom of matrix, mat using nos 77 - 81
	# Make a vector of the right length
	rowvec <- 77:81
	rowvec
	# Now join this vector to update matrix, mat
	mat <- rbind(mat, rowvec)
	mat

	# remove dimension names (names were added because of vector we created)
	# and we don't want them for this exercise. Note that we only created
	# vectors first for easy understanding of R beginners.
	dimnames(mat) <- NULL
	mat

	# NB: Actually, all of this could be done in fewer lines of codes, but
	# it's good to understand the logic this way!!!

	# Your turn!
	# 1. Create a vector with 10 random numbers between 10 and 20. Hint: ?runif()
	# Give the vector any name you like
	# BONUS: Set the seed to 1
	set.seed(1)
	randomNum <- runif(10, 10, 20)

	# 2. Create a vector called firstName with 10 random first names
	# from data in the babynames package. Download and install if necessary
	# BONUS: Write a line of code that will enable you to check if a package
	# is installed or not.
	install.packages('babynames')
	library(babynames)
	help(package = 'babynames')
	str(babynames)
	allNames <- babynames$name
	firstName <- sample(allNames, 10)

	'babynames' %in% .packages(all.available = TRUE)
	"babynames" %in% installed.packages()[, "Package"]
	if (requireNamespace("babynames")) print("package is there")

	# firstName <- sample(babynames::babynames$name, 10)


	# 3. Create a vector with a sequence from 1 to 10. Call it serialNo.
	serialNo <- seq_len(10)

	# 4. Using the vector from step 2 above as a guide, create a character vector with elements
	# "M" and "F" named gender. Thereafter convert it into a categorical variable i.e. factor
	# BONUS: The factor's levels are presented alphabetically by default. Make it
	# such that when you run levels(gender), the first element is "M".
	firstName
	gender <- c('F', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F')
	gender <- factor(gender, levels = c('M', "F"))
	gender

	# 5. Create a matrix called randMat with columns in this order: serialNo, firstName
	# gender and the one with the random numbers
	randMat <- cbind(serialNo, firstName, gender, randomNum)
	randMat

	# 6. Examine the matrix. What do you notice?
	# BONUS: Check the type of the matrix and try carrying out arithmetic on the
	# 'numerical' data
	typeof(randMat)
	# create-vector.R

	# Run the script and study the output carefully to understand what's going on

	# Ex. 1: Make a vector and examine it
	prac <- "Use typeof() to check what type of vector this is"
	prac
	typeof(prac)
	str(prac)

	prac <- "Use length() to check how many elements are in the vector 'prac'"
	prac
	length(prac)

	prac <- c("Thank", "you!")
	prac
	length(prac)

	prac[1]
	prac[2]
	prac[3]
	length(prac) # Note that position 3 has no element, and does not exist

	#Bonus functions: Two common, very different ways of displaying console output
	print(prac)
	cat(prac)

	# Now let's see how one can extend a vector
	set.seed(1345) # for pseudo-randomness
	prac <- runif(6, min = 3, max = 10) # random uniform distribution from 3 - 10
	prac

	length(prac) # length = 6
	prac[7] <- 8.500000 # add
	prac
	length(prac) # length = 7

	prac[9] <- 6.99076 # number is on position 9
	prac
	length(prac)
	prac[9]
	prac[8]

	prac <- append(prac, 7.89224) # append() adds elements to vectors
	length(prac)
	prac[10]

	prac[6]
	prac <- append(prac, 4.33165, after = 5) # insertion; ?append for details
	prac[6]
	length(prac)

	str(prac)
	is.numeric(prac)
	is.integer(prac) # integer = numeric, not vice versa
	is.double(prac) # double & numeric are syonymous

	# Bonus functions: Are there NAs (i.e. missing values) in the result?
	anyNA(prac)
	is.na(prac)
	which(is.na(prac))


	# Ex. 3: Now let us shrink the vector
	length(prac)
	prac

	# One method
	prac_short1 <- prac[1:7]
	prac_short1
	length(prac_short1)

	# Another method
	prac
	prac_short2 <- prac[-c(8:11)]
	prac_short2
	length(prac_short2)

	# Bonus function: Do both methods yield the same result?
	identical(prac_short1, prac_short2)

	# Finally, a small brain teaser...
	prac[9:11] <- NA
	prac
	length(prac)
	identical(prac, prac_short1)
	# How would you solve this tricky condition i.e. make these 2 objects (vectors)
	# to be exactly the same? Push your solution to your forked repository and
	# create pull request.

	## END.