Skip to content

Instantly share code, notes, and snippets.

@verdimrc
Last active May 13, 2025 08:50
Show Gist options
  • Save verdimrc/a10dd3ea00a34b0ffb3e8ee8d5cde8b5 to your computer and use it in GitHub Desktop.
Save verdimrc/a10dd3ea00a34b0ffb3e8ee8d5cde8b5 to your computer and use it in GitHub Desktop.
Various bash commands
# Convert ^M to \n in tqdm log file litterred with ^M
tr "\015" "\n" < inputfile > outputfile
# AWS CLI to handle utf-8 in S3 files.
# https://github.com/aws/aws-cli/issues/3902#issuecomment-513842630
export PYTHONIOENCODING=utf-8
aws s3 ls s3://...
# Exclude chars to generate random string that's compatible with expect.
EXCLUDED_CHAR="'\"\`\\[]{}()*#"
RANDOM_STR=$(apg -a 1 -M SNCL -m 10 -x 10 -n 1 -E "${EXCLUDED_CHAR}")
# Colored watch + jq
watch --color -n10 'pcluster describe-compute-fleet --cluster-name my-cluster | jq -C .'
# Don't let jq crash on invalid json
echo "INVALID JSON" | jq -R '. as $line | try (fromjson) catch $line'
# Make xargs to handle filenames with spaces
# OSX: brew install findutils; then use gxargs
find . -name .ipynb_checkpoints -type d | xargs -d '\n' rm
# Colorize matching lines, but still display all content. May need --color=always to enforce colorization.
ls -al transformers-scripts/ | egrep --color=always 'run_ner.py|transformers-train.py|^'
ls -al transformers-scripts/ | egrep 'run_ner.py|transformers-train.py|^'
# Get script name; handle both cases of ./script.sh or "source ./script.sh"
[[ "${BASH_SOURCE[0]}" == "$0" ]] && echo "Source is being run" || echo "Script is being sourced"
echo "\$BASH_SOURCE ${BASH_SOURCE[0]}"
# Get script's directory. CAUTION: will work even if BASH_SOURCE[@] is non-existent.
BIN_DIR=$(dirname $(realpath ${BASH_SOURCE[@]}))
# Is script run as root?
[[ $EUID -ne 0 ]] && echo 'Script is NOT run as root' || echo 'Script is run as root'
# Run 'docker ps' with new group in affect - https://superuser.com/a/1572955
# NOTE: this fragment is to be used inside a script, hence do not "spawn" subshell
# although technically newgrp still create a subshell.
sudo usermod -a -G docker ec2-user # Normally this needs to logout/login to take effect.
newgrp docker << END
docker ps
END
# Value of environment variable (note: on "normal" variable, printenv gives '').
printenv ENV_VAR_NAME
# Sample of regex matching
[[ $string =~ .*[Ee]rr.*|.*[Ff]ail.* ]] && echo 'Error detected' || echo 'OK'
# Check if an array element is defined. For this example, ${PROFILE[test]} is
# defined because it has an empty-string value.
#
# NOTE: OSX must brew install bash v4 to use dictionary array.
declare -A PROFILE
PROFILE=( ['master']='marcverdcn' ['dev']='vmarchcn' ['test']='' )
REPO="${1:-master}"
[[ ! ${PROFILE[$REPO]+_} ]] && echo No matching profile for repo $REPO >&2
# Variable indirection
for i in VAR_A VAR_B VAR_C; do
# Variable indirection
echo $i=${!i}
done
# Create a variable holding multiple lines.
LONG_STRING=$(cat << EOF
abcd efgh
1234 5678
EOF
)
# Create a file with multiple lines.
cat << EOF > haha.txt
abcd efgh
1234 5678
EOF
# Pass JSON to AWS CLI using "jq -c ." trick.
STOPPING_CONDITIONS=$(cat << EOF
{
"MaxInvocations": 7200,
"ModelLatencyThresholds": [
{
"Percentile": "P95",
"ValueInMilliseconds": 10000
}
]
}
EOF
)
aws sagemaker create-inference-recommendations-job --stopping-conditions "$(echo ${STOPPING_CONDITIONS} | jq -c .)" ...
# Parallel copy.
# Usage: copy.sh <srcdir> <destdir>
cd $1; mkdir $2
find . -type d | (cd $2; xargs -n100 -P40 mkdir --parents)
find . \! -type d | xargs -n100 -P40 cp -t $2 -a --parents
[[ $_ != $0 ]] && echo "Script is being sourced" || echo "Script is being run"
# This one-liner works only on Linux. See get_bin_dir() for ambidextrous version (Linux & OSX).
BIN_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
# Utility function to get script's directory (deal with Mac OSX quirkiness).
# This function is ambidextrous as it works on both Linux and OSX.
get_bin_dir() {
local READLINK=readlink
if [[ $(uname) == 'Darwin' ]]; then
READLINK=greadlink
if [ $(which greadlink) == '' ]; then
echo '[ERROR] Mac OSX requires greadlink. Install with "brew install greadlink"' >&2
exit 1
fi
fi
local BIN_DIR=$(dirname "$($READLINK -f ${BASH_SOURCE[0]})")
echo -n ${BIN_DIR}
}
##########################################################################
# Ambidextrous script (zsh, bash) to detect whether a script is sourced, #
# then get the script's BIN_DIR. #
##########################################################################
# https://stackoverflow.com/a/28776166
is_sourced() {
if [ -n "$ZSH_VERSION" ]; then
case $ZSH_EVAL_CONTEXT in *:file:*) return 0;; esac
else # Add additional POSIX-compatible shell names here, if needed.
case ${0##*/} in bash|-bash) return 0;;
esac
fi
return 1 # NOT sourced.
}
is_sourced || { echo "You must source this script" ; exit -1 ; }
SRC_FILENAME=$([ -n "$ZSH_VERSION" ] && echo "${(%):-%N}" || echo "${BASH_SOURCE[0]}")
# Utility function to get script's directory (deal with Mac OSX quirkiness).
# This function is ambidextrous as it works on both Linux and OSX.
get_bin_dir() {
local READLINK=readlink
if [[ $(uname) == 'Darwin' ]]; then
READLINK=greadlink
if ! command -v greadlink &> /dev/null; then
echo '[ERROR] Mac OSX requires greadlink. Install with "brew install greadlink"' >&2
return
fi
fi
local BIN_DIR=$(dirname "$($READLINK -f $1)")
echo -n ${BIN_DIR}
}
BIN_DIR=$(get_bin_dir "$SRC_FILENAME")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment