Skip to content

Instantly share code, notes, and snippets.

@abiyani
Last active August 29, 2015 13:58
Show Gist options
  • Save abiyani/10319398 to your computer and use it in GitHub Desktop.
Save abiyani/10319398 to your computer and use it in GitHub Desktop.
Removes duplicate paths from the input (which is assumed to be a "PATH-like" string)
# Removes redundant entries from the input (which is assumed to be a "PATH"-like string)
# Input: Exactly one argument ($1): a PATH-like string, which will be de-deuplicated
# Output: Prints the deduplicated string on stdout (can be captured using command substitution by caller)
#
# Notes:
# - Assumes colon (:) as the delimiter character.
# - The only assumption regarding actual pathnames within the input string is that they won't
# contain the delimiter character (':') - this shouldn't be a problem, because bash will
# anyway split on ":" unconditionally (http://stackoverflow.com/a/14661492/1857518). Any other
# valid pathname character (whitespaces, non-printable ascii values, etc) are ok (modulo some other
# limitations, see below for detail)
# - Respects legacy usage of "::" to denote current directory in PATH variable
# (http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html).
# - Does *NOT* dedeuplicate symlinks (only exact matches are deduplicated)
# - "/a/b/" is different from "/a/b" (only exact matches are deduplicated)
# - If first or last character was a ":" in the original string, then it is kept intact.
# This special case is introduced to handle the weird behavior of MANPATH variable
# (https://github.com/fish-shell/fish-shell/issues/935)
# Examples: (NOTE: Quotes around the argument to the function are very important, do *NOT* omit them)
#
# PATH="$(dedup_path "$PATH")"
# PYTHONPATH="$(dedup_path "$PYTHONPATH")"
# MANPATH="$(dedup_path "$MANPATH")"
# ... etc
#
# If you want to be totally virtuous, and preserve the (potential) newline at the end of last path,
# you may use the following pattern (ref: http://www.etalabs.net/sh_tricks.html):
# PATH="$(dedup_path "$PATH"; printf X)"; PATH="${PATH%?}"
# # And you will also be qualified for this: https://xkcd.com/356/
function dedup_path() {
# Run in a subshell, so that IFS/SHELLOPTS change is not visible outside
(
set -f
IFS=:
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
local all_paths_added=":" # Will push "path:" to this string whenever we add it to the actual variable
else
declare -A seen # Associative array (faster, but needs bash version >= 4.0)
fi
local deduped_path=""
local two_colons_seen=0 # Keep track of the fact if we have written two consecuitive colons in the de-duplicated string yet
for p in ${1}; do
if [[ -z "${p}" ]]; then
if [[ "${deduped_path:(-1)}" == ":" ]]; then
if [[ $two_colons_seen -eq 0 ]]; then
deduped_path="${deduped_path}:"
fi
two_colons_seen=1
else
deduped_path="${deduped_path}:"
fi
continue
fi
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
if [[ "${all_paths_added}" != *":${p}:"* ]]; then
all_paths_added="${all_paths_added}${p}:"
deduped_path="${deduped_path}${p}:"
fi
else
if [[ -z "${seen["${p}"]}" ]]; then
seen["${p}"]=1
deduped_path="${deduped_path}${p}:"
fi
fi
done
if [[ "${deduped_path:(-1)}" == ":" && "${deduped_path:(-2)}" != "::" && "${1:(-1)}" != ":" ]]; then
deduped_path="${deduped_path%?}"
fi
printf %s "${deduped_path}"
)
}
#####################################################################################################################################
#####################################################################################################################################
#########
# TESTS #
#########
echo -e "\n#### Will run all the tests ####\n"
# Asserts that calling function dedup on "$1" returns back "$2"
function assert() {
# To learn, why we use the weird pattern of appending a character "X" (inside $() ) and then removing it immediately after,
# see this: http://www.etalabs.net/sh_tricks.html (§"Getting non-clobbered output from command substitution").
# tldr version; Bash strips all trailing newlines from output, so we add extraneous character at the end, and remove it.
local out="$(dedup_path "${1}";printf X)";out="${out%?}"
if [[ "${out}" != "${2}" ]]; then
echo -ne "\n@@@@@ Failed @@@@@ for\n'${1}'\nExpected = '${2}'\nReceived='${out}'\n"
else
echo -e "Passed"
fi
}
ORIGIFS="$IFS"
ORIGSHELLOPTS="$SHELLOPTS"
assert "" ""
assert ":" ":"
assert " " " "
assert " " " "
assert " :" " :"
assert " : " " : "
assert " :: ::" " :: :"
n="$(printf "\n";printf X)";n="${n%?}" # newline char
t="$(printf "\t";printf X)";t="${t%?}" # \t char
assert "${n}" "${n}"
assert "${n}:" "${n}:"
assert "${t}" "${t}"
assert "${t}:" "${t}:"
assert "${t}:${t}" "${t}"
assert "${n}:::${n}::::${n}:" "${n}::"
assert "${n}:${n}:" "${n}:"
assert "${n}${n}" "${n}${n}"
assert "${n}${n}:${n}${n}" "${n}${n}"
assert "${n}${n}:${n}${n}:" "${n}${n}:"
assert "${n}${n}:${n}${n}::" "${n}${n}::"
assert "${n}${n}:${n}:::${t}::" "${n}${n}:${n}::${t}:"
assert "${t}::${t}:::${t}${t}" "${t}::${t}${t}"
assert "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}" "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}"
assert "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:" "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:"
assert "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:" "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:"
assert "/ab${n}:::/ab${t}:/a${n}/b::/ab${n}:::${n}" "/ab${n}::/ab${t}:/a${n}/b:${n}"
assert "/ab${n}:::/ab${t}:/a${n}/b::/ab${n}:::${n}::" "/ab${n}::/ab${t}:/a${n}/b:${n}:"
assert "/ab${n}:::/ab${t}:/a${n}/b::/ab${n}:::${n}:" "/ab${n}::/ab${t}:/a${n}/b:${n}:"
assert " /" " /"
assert ":" ":"
assert "::" "::"
assert ":::" "::"
assert "/a/:::" "/a/::"
assert "::/a/:::" "::/a/:"
assert ":/a/b:/a/:" ":/a/b:/a/:"
assert ":/a/b:/a/b:" ":/a/b:"
assert ":/a/b:/a/b" ":/a/b"
assert ":/a/b:/a/b/:" ":/a/b:/a/b/:"
assert "/a b c/d e:/a b c/d e::" "/a b c/d e::"
assert "/a/b:/a/b:/c/d::/a/b::" "/a/b:/c/d::"
assert "/a/b:/a/b:/c/d::/a/b: :" "/a/b:/c/d:: :"
assert "/a/b:/a/b:/c/d::/a/b: " "/a/b:/c/d:: "
assert "::a::a::b::c::d" "::a:b:c:d"
assert "::a::a::b::c::d::" "::a:b:c:d:"
assert "/usr/bin:/usr/local/bin:/:/usr:/usr/local::/usr/bin:/usr/sbin" "/usr/bin:/usr/local/bin:/:/usr:/usr/local::/usr/sbin"
assert ".:..:.::./" ".:..::./"
assert "/a/b/:/a/b:/a/:/a::" "/a/b/:/a/b:/a/:/a::"
assert "//:///:////::::/:://///:" "//:///:////::/://///:"
assert '$PATH:$PATH' '$PATH' # The single quotes are on purpose here
assert "/*:/" "/*:/"
assert "*::**:*" "*::**"
assert "/?://://" "/?://"
assert "/a:/a:/a" "/a"
assert "/a:/a::/a:/a" "/a::" # This case is sort of a special case, as the de-duplicated path will have a trailing ":" (when original didn't), it might change semantic for MANPATH, but it's still the right thing to do
assert '/a\$:/a$' '/a\$:/a$'
assert '/a\$:/a$:' '/a\$:/a$:'
assert '/a`ls`::' '/a`ls`::'
assert '`ls`' '`ls`'
assert '":"":":""' '":""'
assert "':'':':''" "':''"
assert '::${BASH_VERSINFO[@]}:${BASH_VERSINFO[@]}::' '::${BASH_VERSINFO[@]}:'
assert '.*:/a' '.*:/a'
assert '!:/a' '!:/a'
assert 'ls|egrep blah:ls|egrep blah' 'ls|egrep blah'
assert '$(ls):$(ls)' '$(ls)'
assert '\\:\:\\:\\\\' '\\:\:\\\\'
assert '-ne' '-ne'
assert '*:??::**:*' '*:??::**'
if [[ "$ORIGIFS" != "$IFS" ]]; then
echo -e "\n@@@@@ IFS got changed @@@@@\noriginal = '$ORIGIFS'\nnow = '$IFS'"
else
echo "Passed - IFS is unchanged"
fi
if [[ "$ORIGSHELLOPTS" != "$SHELLOPTS" ]]; then
echo -e "\n@@@@@ SHELLOPTS got changed @@@@@\noriginal = '$ORIGSHELLOPTS'\nnow = '$SHELLOPTS'"
else
echo "Passed - SHELLOPTS is unchanged"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment