Last active
August 29, 2015 13:58
-
-
Save abiyani/10319398 to your computer and use it in GitHub Desktop.
Removes duplicate paths from the input (which is assumed to be a "PATH-like" string)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Removes redundant entries from the input (which is assumed to be a "PATH"-like string) | |
# Input: Exactly one argument ($1): a PATH-like string, which will be de-deuplicated | |
# Output: Prints the deduplicated string on stdout (can be captured using command substitution by caller) | |
# | |
# Notes: | |
# - Assumes colon (:) as the delimiter character. | |
# - The only assumption regarding actual pathnames within the input string is that they won't | |
# contain the delimiter character (':') - this shouldn't be a problem, because bash will | |
# anyway split on ":" unconditionally (http://stackoverflow.com/a/14661492/1857518). Any other | |
# valid pathname character (whitespaces, non-printable ascii values, etc) are ok (modulo some other | |
# limitations, see below for detail) | |
# - Respects legacy usage of "::" to denote current directory in PATH variable | |
# (http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html). | |
# - Does *NOT* dedeuplicate symlinks (only exact matches are deduplicated) | |
# - "/a/b/" is different from "/a/b" (only exact matches are deduplicated) | |
# - If first or last character was a ":" in the original string, then it is kept intact. | |
# This special case is introduced to handle the weird behavior of MANPATH variable | |
# (https://github.com/fish-shell/fish-shell/issues/935) | |
# Examples: (NOTE: Quotes around the argument to the function are very important, do *NOT* omit them) | |
# | |
# PATH="$(dedup_path "$PATH")" | |
# PYTHONPATH="$(dedup_path "$PYTHONPATH")" | |
# MANPATH="$(dedup_path "$MANPATH")" | |
# ... etc | |
# | |
# If you want to be totally virtuous, and preserve the (potential) newline at the end of last path, | |
# you may use the following pattern (ref: http://www.etalabs.net/sh_tricks.html): | |
# PATH="$(dedup_path "$PATH"; printf X)"; PATH="${PATH%?}" | |
# # And you will also be qualified for this: https://xkcd.com/356/ | |
function dedup_path() { | |
# Run in a subshell, so that IFS/SHELLOPTS change is not visible outside | |
( | |
set -f | |
IFS=: | |
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then | |
local all_paths_added=":" # Will push "path:" to this string whenever we add it to the actual variable | |
else | |
declare -A seen # Associative array (faster, but needs bash version >= 4.0) | |
fi | |
local deduped_path="" | |
local two_colons_seen=0 # Keep track of the fact if we have written two consecuitive colons in the de-duplicated string yet | |
for p in ${1}; do | |
if [[ -z "${p}" ]]; then | |
if [[ "${deduped_path:(-1)}" == ":" ]]; then | |
if [[ $two_colons_seen -eq 0 ]]; then | |
deduped_path="${deduped_path}:" | |
fi | |
two_colons_seen=1 | |
else | |
deduped_path="${deduped_path}:" | |
fi | |
continue | |
fi | |
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then | |
if [[ "${all_paths_added}" != *":${p}:"* ]]; then | |
all_paths_added="${all_paths_added}${p}:" | |
deduped_path="${deduped_path}${p}:" | |
fi | |
else | |
if [[ -z "${seen["${p}"]}" ]]; then | |
seen["${p}"]=1 | |
deduped_path="${deduped_path}${p}:" | |
fi | |
fi | |
done | |
if [[ "${deduped_path:(-1)}" == ":" && "${deduped_path:(-2)}" != "::" && "${1:(-1)}" != ":" ]]; then | |
deduped_path="${deduped_path%?}" | |
fi | |
printf %s "${deduped_path}" | |
) | |
} | |
##################################################################################################################################### | |
##################################################################################################################################### | |
######### | |
# TESTS # | |
######### | |
echo -e "\n#### Will run all the tests ####\n" | |
# Asserts that calling function dedup on "$1" returns back "$2" | |
function assert() { | |
# To learn, why we use the weird pattern of appending a character "X" (inside $() ) and then removing it immediately after, | |
# see this: http://www.etalabs.net/sh_tricks.html (§"Getting non-clobbered output from command substitution"). | |
# tldr version; Bash strips all trailing newlines from output, so we add extraneous character at the end, and remove it. | |
local out="$(dedup_path "${1}";printf X)";out="${out%?}" | |
if [[ "${out}" != "${2}" ]]; then | |
echo -ne "\n@@@@@ Failed @@@@@ for\n'${1}'\nExpected = '${2}'\nReceived='${out}'\n" | |
else | |
echo -e "Passed" | |
fi | |
} | |
ORIGIFS="$IFS" | |
ORIGSHELLOPTS="$SHELLOPTS" | |
assert "" "" | |
assert ":" ":" | |
assert " " " " | |
assert " " " " | |
assert " :" " :" | |
assert " : " " : " | |
assert " :: ::" " :: :" | |
n="$(printf "\n";printf X)";n="${n%?}" # newline char | |
t="$(printf "\t";printf X)";t="${t%?}" # \t char | |
assert "${n}" "${n}" | |
assert "${n}:" "${n}:" | |
assert "${t}" "${t}" | |
assert "${t}:" "${t}:" | |
assert "${t}:${t}" "${t}" | |
assert "${n}:::${n}::::${n}:" "${n}::" | |
assert "${n}:${n}:" "${n}:" | |
assert "${n}${n}" "${n}${n}" | |
assert "${n}${n}:${n}${n}" "${n}${n}" | |
assert "${n}${n}:${n}${n}:" "${n}${n}:" | |
assert "${n}${n}:${n}${n}::" "${n}${n}::" | |
assert "${n}${n}:${n}:::${t}::" "${n}${n}:${n}::${t}:" | |
assert "${t}::${t}:::${t}${t}" "${t}::${t}${t}" | |
assert "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}" "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}" | |
assert "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:" "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:" | |
assert "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:" "/ab${n}/:/ab :/a${t}/b::/ab${n}${n}:" | |
assert "/ab${n}:::/ab${t}:/a${n}/b::/ab${n}:::${n}" "/ab${n}::/ab${t}:/a${n}/b:${n}" | |
assert "/ab${n}:::/ab${t}:/a${n}/b::/ab${n}:::${n}::" "/ab${n}::/ab${t}:/a${n}/b:${n}:" | |
assert "/ab${n}:::/ab${t}:/a${n}/b::/ab${n}:::${n}:" "/ab${n}::/ab${t}:/a${n}/b:${n}:" | |
assert " /" " /" | |
assert ":" ":" | |
assert "::" "::" | |
assert ":::" "::" | |
assert "/a/:::" "/a/::" | |
assert "::/a/:::" "::/a/:" | |
assert ":/a/b:/a/:" ":/a/b:/a/:" | |
assert ":/a/b:/a/b:" ":/a/b:" | |
assert ":/a/b:/a/b" ":/a/b" | |
assert ":/a/b:/a/b/:" ":/a/b:/a/b/:" | |
assert "/a b c/d e:/a b c/d e::" "/a b c/d e::" | |
assert "/a/b:/a/b:/c/d::/a/b::" "/a/b:/c/d::" | |
assert "/a/b:/a/b:/c/d::/a/b: :" "/a/b:/c/d:: :" | |
assert "/a/b:/a/b:/c/d::/a/b: " "/a/b:/c/d:: " | |
assert "::a::a::b::c::d" "::a:b:c:d" | |
assert "::a::a::b::c::d::" "::a:b:c:d:" | |
assert "/usr/bin:/usr/local/bin:/:/usr:/usr/local::/usr/bin:/usr/sbin" "/usr/bin:/usr/local/bin:/:/usr:/usr/local::/usr/sbin" | |
assert ".:..:.::./" ".:..::./" | |
assert "/a/b/:/a/b:/a/:/a::" "/a/b/:/a/b:/a/:/a::" | |
assert "//:///:////::::/:://///:" "//:///:////::/://///:" | |
assert '$PATH:$PATH' '$PATH' # The single quotes are on purpose here | |
assert "/*:/" "/*:/" | |
assert "*::**:*" "*::**" | |
assert "/?://://" "/?://" | |
assert "/a:/a:/a" "/a" | |
assert "/a:/a::/a:/a" "/a::" # This case is sort of a special case, as the de-duplicated path will have a trailing ":" (when original didn't), it might change semantic for MANPATH, but it's still the right thing to do | |
assert '/a\$:/a$' '/a\$:/a$' | |
assert '/a\$:/a$:' '/a\$:/a$:' | |
assert '/a`ls`::' '/a`ls`::' | |
assert '`ls`' '`ls`' | |
assert '":"":":""' '":""' | |
assert "':'':':''" "':''" | |
assert '::${BASH_VERSINFO[@]}:${BASH_VERSINFO[@]}::' '::${BASH_VERSINFO[@]}:' | |
assert '.*:/a' '.*:/a' | |
assert '!:/a' '!:/a' | |
assert 'ls|egrep blah:ls|egrep blah' 'ls|egrep blah' | |
assert '$(ls):$(ls)' '$(ls)' | |
assert '\\:\:\\:\\\\' '\\:\:\\\\' | |
assert '-ne' '-ne' | |
assert '*:??::**:*' '*:??::**' | |
if [[ "$ORIGIFS" != "$IFS" ]]; then | |
echo -e "\n@@@@@ IFS got changed @@@@@\noriginal = '$ORIGIFS'\nnow = '$IFS'" | |
else | |
echo "Passed - IFS is unchanged" | |
fi | |
if [[ "$ORIGSHELLOPTS" != "$SHELLOPTS" ]]; then | |
echo -e "\n@@@@@ SHELLOPTS got changed @@@@@\noriginal = '$ORIGSHELLOPTS'\nnow = '$SHELLOPTS'" | |
else | |
echo "Passed - SHELLOPTS is unchanged" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment