huonw/bash-tutorial.sh

## bash-tutorial.sh
#!/bin/bash
# (The directive above is called a "shebang": https://en.wikipedia.org/wiki/Shebang_(Unix) )

# Helper function that prints each of the arguments (functions can be
# declared with `function <name> { ... }` or `<name>() { ... })
printArguments() {
  # arguments come in as the variables $1, $2, $3, ...
  count=1
  # (( ... )) does numeric tasks
  while (( $# )); do
    echo "$count: $1"
    # shift removes $1, and moves $2 to $1, $3 to $2, etc.
    shift

    (( count++ ))
  done
}

# Helper function that prints the number of arguments
countArguments() {
  echo "$# arguments"
}

# Helper functions that just manage the exercises
quitOnIncorrect=true
currentExercise=
startExercise() {
  currentExercise="$1"
  echo "## Exercise $1 - $2 ##"
}
checkAnswer() {
  if [ "$1" = "$2" ]; then
    printf "correct answer for '%s'!\n" "$currentExercise"
  else
    printf "incorrect answer for '%s':\n  found: %s\n  expected: %s\n" "$currentExercise" "$1" "$2"
    if "$quitOnIncorrect"; then
      exit 1
    fi
  fi
}


# Bash is a fancy way to invoke programs.

## "Programming"

# It supports things you might recognise as "functions" and
# "variables" and even "arrays", but the behaviour of them can be
# weeeeeeird.

# The most useful tools for bashing are:
#
# - `help something` for builtins
# - `man something` for external programs
# - https://www.tldp.org/LDP/abs/html/parameter-substitution.html


### Variables and expansion

# Bash variables are defined with <name>=<value>, with no spaces before or after the =:
some_variable=the_value

# They are used with $:

echo "passing some_variable to the functions"
printArguments $some_variable
countArguments $some_variable

# They are strings, whether or not they are declared with " or '


startExercise "1a" "Declaring variables"

# What if we want a variable with some text? The variable needs to be
# a string with the three words delta, echo and foxtrot in it. Here's
# a start:
text=delta echo foxtrot # FIXME

echo "The 'text' variable contains: $text"

checkAnswer "$text" "delta echo foxtrot"

# What happened for the original version of the code? Why did it happen?


startExercise "1b" "Quoting"

# Now, let try to pass this 'text' variable to an external command. We
# want there to be one argument which contains all three
# words. Specifically, we want the second one, in the following
# versions from other languages:
#
# - Python:
#     subprocess.run(["printArguments", "delta", "echo", "foxtrot"])
#     subprocess.run(["printArguments", "delta echo foxtrot"])
# - Node:
#     child_process.spawn("printArguments", ["delta", "echo", "foxtrot"])
#     child_process.spawn("printArguments", ["delta echo foxtrot"])

# Without quoting, Bash will split the expanded variables on
# spaces. With quoting, we can stop this happening.

printArguments $text # FIXME

# Bash lets us capture the results of a command using $(...), which we
# can assign to a variable.

captured=$(printArguments $text) # FIXME

checkAnswer "$captured" "1: delta echo foxtrot"


startExercise "1c" "Single quotes"

# Bash will look inside strings with "..." and interpret any $s or \
# escapes. Quoting variables is just a special case of this, and one
# can create arbitrary strings with substitutions. For instance, the
# next line creates a long string, as just one argument (and, with "
# instead of \"):

printArguments "a \"string\" that $text contains some substitutions $captured"

# Bash won't introspect strings with ' at all, including for $
# substitutions, and even \ escapes

noSubstitution="a string that $text contains some substitutions $captured" # FIXME

printArguments "$noSubstitution"

checkAnswer "$noSubstitution" 'a string that $text contains some substitutions $captured'


startExercise "1d" "Curly braces"

# Variable substitutions can be surrounded with {}, e.g.:

echo "with {}:" ${text}
echo "with {}: ${text}"

# This serves two purposes:
#
# - for array and other advanced substitutions (https://www.tldp.org/LDP/abs/html/parameter-substitution.html)
# - to specify the exact variable name to be substituted in complicated text

# In this code we want to end up with "FOO<value of variable
# 'text'>BAR", but Bash is currently interpreting it as "FOO<value of
# variable 'textBAR'>".
variable="FOO$textBAR" # FIXME

checkAnswer "$variable" "FOOdelta echo foxtrotBAR"


startExercise "1e" "Arrays"

# In addition to strings, Bash also has arrays. They can be declared
# with (element element ...).

array=(foo "delta echo foxtrot" bar)

# There's a few things one can do with them, all using the advanced
# ${...} substitutions. In the style of `${name[...]}`.

# Individual elements can be retrieved by indexing
secondElement="${array}" # FIXME

echo "the second element is"
printArguments "${secondElement}"

checkAnswer "$secondElement" "delta echo foxtrot"

# The full array can be expanded using @ or * as index. Without
# quoting, they do the same thing

echo "array with *"
printArguments ${array[*]} # FIXME
echo "array with @"
printArguments ${array[@]} # FIXME

# What do they do with quoting? (Edit above to see.)
countStar=$(countArguments ${array[*]}) # FIXME
countAt=$(countArguments ${array[*]}) # FIXME


checkAnswer "$countStar" "1 arguments"
checkAnswer "$countAt" "3 arguments"


# "${array[@]}" is the most useful way to manipulate arrays: it keeps
# each element together.

# Finally, one can also count the number of elements in an array with #

count="${#array[@]}"

checkAnswer "$count" "..." # FIXME (put in the right count)


# The command line arguments to a script (and the arguments to a
# function) are represent as an implicit array, which is indexed like
# `$0` (the name of the script/how it was invoked) `$1` `$2`, .... It
# can also be interpreted as an array with "$@" and "$*", and the @
# and * mean the same thing as with arrays.

# Let's write a function that prints its second argument, and then how
# many arguments:
myFunction() {
  echo
  countArguments
}

myFunction 1 "2 3" "4 5"

checkAnswer "$(myFunction foo 'bar baz' qux)" "$(printf 'bar baz\n3 arguments\n')"


startExercise "2a" "Errors and booleans"


# Bash interprets the return code of a process as two related things:
#
# - Errors: 0 means no error, anything else is an error
# - Boolean: 0 means true, anything else is false
#
# An `if` statement works like:
#
#     if <program>; then <command>...; else <command>...; fi
#
# If <program> is true (returns 0), it executes the then branch,
# otherwise the else branch.

# The [ program (yes, it's a program, see `man [`) is one way to do
# checks. It returns 0 when the expression it is given is true.

variable=20

if [ ]; then # FIXME (see `man [`)
  echo "variable was larger than 10"
  larger=true
else
  echo "variable was less than 10"
  larger=false
fi

checkAnswer "$larger" "true"

## Control

# Bash is awkward to use; how can we have the computer help?

# The `set` builtin lets us make bash more strict and more helpful:
#
# help set
# ...
#         -e  Exit immediately if a command exits with a non-zero status.
#         -o option-name
#                 ...
#                 pipefail     the return value of a pipeline is the status of
#                              the last command to exit with a non-zero status,
#                              or zero if no command exited with a non-zero status
#         -u  Treat unset variables as an error when substituting.
#         ...
#         -x  Print commands and their arguments as they are executed.
#
# Almost all shell scripts should start with `set -euo pipefail`
# (equivalent to `set -e -u -o pipefail`).
#
# This tutorial won't talk about '-o pipefail', but it is important.


startExercise "3a" "set -e"

# The -e option means that we never forget to handle an error.

# This function always has exit-status 56 (which is not 0 so is an error)
functionThatFails() {
  return 56
}

# If we run the command as is, we keep going. And, using the special
# `$?` variable, we can see what the exit status of the previous
# command was
echo "before the failure"
functionThatFails
echo "after the failure (exit: $?)"

# If something fails and we keep going without realising, we could be
# doing operations in the wrong place, or on the wrong data. set -e
# avoids this.
captured=$(
  # set ... # FIXME
  echo "before the failure"
  functionThatFails
  echo "after the failure (exit: $?)"
)

checkAnswer "$captured" "before the failure"

# The 'captured' command above runs in a "subshell", which means that
# the 'set -e' exit only exits from that subshell (so it only skips
# the second 'echo'). What happens if we do it in the main part of our
# script?

set -e
echo "before the failure"
# functionThatFails  # FIXME
echo "after the failure (exit: $?)"

# If the return code is checked, such as by `if`, the program won't
# exit, even with `set -e`.

if : ; then # FIXME
  itFailed=false
else
  itFailed=true
fi

checkAnswer "$itFailed" "true"

startExercise "3b" "set -u"

# Bash is very ... relaxed about variables. If the variable doesn't
# exist, by default it will just expand to nothing.

echo "before the variable that doesn't exist"
printArguments "$doesNotExist"
echo "after the variable"

# set -u will make this an error, and so catch our typos
captured=$(
  # set ... # FIXME
  echo "before the variable that doesn't exist"
  printArguments "$doesNotExist"
  echo "after the variable"
)
checkAnswer "$captured" "before the variable"

# As above, we did this in a subshell; what happens if we do it at the
# top level?
set -u
echo "before the variable that doesn't exist"
# printArguments "$doesNotExist" # FIXME
echo "after the variable"

startExercise "3c" "set -x"

# A useful command for understanding what is happening is `set -x`,
# which makes bash print out every command that it runs. Let's see what happens

# set ... # FIXME
printArguments "${array[@]}"


## Shellcheck

# https://www.shellcheck.net is an automatic way to flag many problems
# in shell scripts. Paste this file into it and see what it says.
	#!/bin/bash
	# (The directive above is called a "shebang": https://en.wikipedia.org/wiki/Shebang_(Unix) )

	# Helper function that prints each of the arguments (functions can be
	# declared with `function <name> { ... }` or `<name>() { ... })
	printArguments() {
	# arguments come in as the variables $1, $2, $3, ...
	count=1
	# (( ... )) does numeric tasks
	while (( $# )); do
	echo "$count: $1"
	# shift removes $1, and moves $2 to $1, $3 to $2, etc.
	shift

	(( count++ ))
	done
	}

	# Helper function that prints the number of arguments
	countArguments() {
	echo "$# arguments"
	}

	# Helper functions that just manage the exercises
	quitOnIncorrect=true
	currentExercise=
	startExercise() {
	currentExercise="$1"
	echo "## Exercise $1 - $2 ##"
	}
	checkAnswer() {
	if [ "$1" = "$2" ]; then
	printf "correct answer for '%s'!\n" "$currentExercise"
	else
	printf "incorrect answer for '%s':\n found: %s\n expected: %s\n" "$currentExercise" "$1" "$2"
	if "$quitOnIncorrect"; then
	exit 1
	fi
	fi
	}





	# Bash is a fancy way to invoke programs.

	## "Programming"

	# It supports things you might recognise as "functions" and
	# "variables" and even "arrays", but the behaviour of them can be
	# weeeeeeird.

	# The most useful tools for bashing are:
	#
	# - `help something` for builtins
	# - `man something` for external programs
	# - https://www.tldp.org/LDP/abs/html/parameter-substitution.html





	### Variables and expansion

	# Bash variables are defined with <name>=<value>, with no spaces before or after the =:
	some_variable=the_value

	# They are used with $:

	echo "passing some_variable to the functions"
	printArguments $some_variable
	countArguments $some_variable

	# They are strings, whether or not they are declared with " or '



	startExercise "1a" "Declaring variables"

	# What if we want a variable with some text? The variable needs to be
	# a string with the three words delta, echo and foxtrot in it. Here's
	# a start:
	text=delta echo foxtrot # FIXME

	echo "The 'text' variable contains: $text"

	checkAnswer "$text" "delta echo foxtrot"

	# What happened for the original version of the code? Why did it happen?




	startExercise "1b" "Quoting"

	# Now, let try to pass this 'text' variable to an external command. We
	# want there to be one argument which contains all three
	# words. Specifically, we want the second one, in the following
	# versions from other languages:
	#
	# - Python:
	# subprocess.run(["printArguments", "delta", "echo", "foxtrot"])
	# subprocess.run(["printArguments", "delta echo foxtrot"])
	# - Node:
	# child_process.spawn("printArguments", ["delta", "echo", "foxtrot"])
	# child_process.spawn("printArguments", ["delta echo foxtrot"])

	# Without quoting, Bash will split the expanded variables on
	# spaces. With quoting, we can stop this happening.

	printArguments $text # FIXME

	# Bash lets us capture the results of a command using $(...), which we
	# can assign to a variable.

	captured=$(printArguments $text) # FIXME

	checkAnswer "$captured" "1: delta echo foxtrot"


	startExercise "1c" "Single quotes"

	# Bash will look inside strings with "..." and interpret any $s or \
	# escapes. Quoting variables is just a special case of this, and one
	# can create arbitrary strings with substitutions. For instance, the
	# next line creates a long string, as just one argument (and, with "
	# instead of \"):

	printArguments "a \"string\" that $text contains some substitutions $captured"

	# Bash won't introspect strings with ' at all, including for $
	# substitutions, and even \ escapes

	noSubstitution="a string that $text contains some substitutions $captured" # FIXME

	printArguments "$noSubstitution"

	checkAnswer "$noSubstitution" 'a string that $text contains some substitutions $captured'



	startExercise "1d" "Curly braces"

	# Variable substitutions can be surrounded with {}, e.g.:

	echo "with {}:" ${text}
	echo "with {}: ${text}"

	# This serves two purposes:
	#
	# - for array and other advanced substitutions (https://www.tldp.org/LDP/abs/html/parameter-substitution.html)
	# - to specify the exact variable name to be substituted in complicated text

	# In this code we want to end up with "FOO<value of variable
	# 'text'>BAR", but Bash is currently interpreting it as "FOO<value of
	# variable 'textBAR'>".
	variable="FOO$textBAR" # FIXME

	checkAnswer "$variable" "FOOdelta echo foxtrotBAR"


	startExercise "1e" "Arrays"

	# In addition to strings, Bash also has arrays. They can be declared
	# with (element element ...).

	array=(foo "delta echo foxtrot" bar)

	# There's a few things one can do with them, all using the advanced
	# ${...} substitutions. In the style of `${name[...]}`.

	# Individual elements can be retrieved by indexing
	secondElement="${array}" # FIXME

	echo "the second element is"
	printArguments "${secondElement}"

	checkAnswer "$secondElement" "delta echo foxtrot"

	# The full array can be expanded using @ or * as index. Without
	# quoting, they do the same thing

	echo "array with *"
	printArguments ${array[*]} # FIXME
	echo "array with @"
	printArguments ${array[@]} # FIXME

	# What do they do with quoting? (Edit above to see.)
	countStar=$(countArguments ${array[*]}) # FIXME
	countAt=$(countArguments ${array[*]}) # FIXME


	checkAnswer "$countStar" "1 arguments"
	checkAnswer "$countAt" "3 arguments"


	# "${array[@]}" is the most useful way to manipulate arrays: it keeps
	# each element together.

	# Finally, one can also count the number of elements in an array with #

	count="${#array[@]}"

	checkAnswer "$count" "..." # FIXME (put in the right count)


	# The command line arguments to a script (and the arguments to a
	# function) are represent as an implicit array, which is indexed like
	# `$0` (the name of the script/how it was invoked) `$1` `$2`, .... It
	# can also be interpreted as an array with "$@" and "$*", and the @
	# and * mean the same thing as with arrays.

	# Let's write a function that prints its second argument, and then how
	# many arguments:
	myFunction() {
	echo
	countArguments
	}

	myFunction 1 "2 3" "4 5"

	checkAnswer "$(myFunction foo 'bar baz' qux)" "$(printf 'bar baz\n3 arguments\n')"



	startExercise "2a" "Errors and booleans"


	# Bash interprets the return code of a process as two related things:
	#
	# - Errors: 0 means no error, anything else is an error
	# - Boolean: 0 means true, anything else is false
	#
	# An `if` statement works like:
	#
	# if <program>; then <command>...; else <command>...; fi
	#
	# If <program> is true (returns 0), it executes the then branch,
	# otherwise the else branch.

	# The [ program (yes, it's a program, see `man [`) is one way to do
	# checks. It returns 0 when the expression it is given is true.

	variable=20

	if [ ]; then # FIXME (see `man [`)
	echo "variable was larger than 10"
	larger=true
	else
	echo "variable was less than 10"
	larger=false
	fi

	checkAnswer "$larger" "true"

	## Control

	# Bash is awkward to use; how can we have the computer help?

	# The `set` builtin lets us make bash more strict and more helpful:
	#
	# help set
	# ...
	# -e Exit immediately if a command exits with a non-zero status.
	# -o option-name
	# ...
	# pipefail the return value of a pipeline is the status of
	# the last command to exit with a non-zero status,
	# or zero if no command exited with a non-zero status
	# -u Treat unset variables as an error when substituting.
	# ...
	# -x Print commands and their arguments as they are executed.
	#
	# Almost all shell scripts should start with `set -euo pipefail`
	# (equivalent to `set -e -u -o pipefail`).
	#
	# This tutorial won't talk about '-o pipefail', but it is important.


	startExercise "3a" "set -e"

	# The -e option means that we never forget to handle an error.

	# This function always has exit-status 56 (which is not 0 so is an error)
	functionThatFails() {
	return 56
	}

	# If we run the command as is, we keep going. And, using the special
	# `$?` variable, we can see what the exit status of the previous
	# command was
	echo "before the failure"
	functionThatFails
	echo "after the failure (exit: $?)"

	# If something fails and we keep going without realising, we could be
	# doing operations in the wrong place, or on the wrong data. set -e
	# avoids this.
	captured=$(
	# set ... # FIXME
	echo "before the failure"
	functionThatFails
	echo "after the failure (exit: $?)"
	)

	checkAnswer "$captured" "before the failure"

	# The 'captured' command above runs in a "subshell", which means that
	# the 'set -e' exit only exits from that subshell (so it only skips
	# the second 'echo'). What happens if we do it in the main part of our
	# script?

	set -e
	echo "before the failure"
	# functionThatFails # FIXME
	echo "after the failure (exit: $?)"

	# If the return code is checked, such as by `if`, the program won't
	# exit, even with `set -e`.

	if : ; then # FIXME
	itFailed=false
	else
	itFailed=true
	fi

	checkAnswer "$itFailed" "true"

	startExercise "3b" "set -u"

	# Bash is very ... relaxed about variables. If the variable doesn't
	# exist, by default it will just expand to nothing.

	echo "before the variable that doesn't exist"
	printArguments "$doesNotExist"
	echo "after the variable"

	# set -u will make this an error, and so catch our typos
	captured=$(
	# set ... # FIXME
	echo "before the variable that doesn't exist"
	printArguments "$doesNotExist"
	echo "after the variable"
	)
	checkAnswer "$captured" "before the variable"

	# As above, we did this in a subshell; what happens if we do it at the
	# top level?
	set -u
	echo "before the variable that doesn't exist"
	# printArguments "$doesNotExist" # FIXME
	echo "after the variable"

	startExercise "3c" "set -x"

	# A useful command for understanding what is happening is `set -x`,
	# which makes bash print out every command that it runs. Let's see what happens

	# set ... # FIXME
	printArguments "${array[@]}"



	## Shellcheck

	# https://www.shellcheck.net is an automatic way to flag many problems
	# in shell scripts. Paste this file into it and see what it says.