Bash urlencode and urldecode
urlencode() {
# urlencode <string>
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:$i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
urldecode() {
# urldecode <string>
local url_encoded="${1//+/ }"
printf '%b' "${url_encoded//%/\\x}"
cjplay02 commented Jan 29, 2021

Good call @cdown. I had the urldecode call in a command substitution - urldecoded=$(urldecode 's3://...'). Once I removed the function call from the command substitution, the spaces were retained from the encoding. Now I just need to find a better way to declare the result as a variable...

Edit. Double Quoting around the variable's presentation in downstream commands fixed my issue. Ie echo "$varname"

dicktyr commented Jan 31, 2021

just a brief nod to mawk
which is five times faster in my tests
(indeed, often faster than sed)

I know it's not a de facto standard like bash
i.e. installed by default on so many systems
but it should be and it is on my systems

I also notice that bash seems to be catching up with ksh93

GeekDuanLian commented Feb 9, 2021

One line implementation, suitable for storing in .bashrc

urle () { [[ "${1}" ]] || return 1; local LANG=C i x; for (( i = 0; i < ${#1}; i++ )); do x="${1:i:1}"; [[ "${x}" == [a-zA-Z0-9.~_-] ]] && echo -n "${x}" || printf '%%%02X' "'${x}"; done; echo; }
urld () { [[ "${1}" ]] || return 1; : "${1//+/ }"; echo -e "${_//%/\\x}"; }

rojenzaman commented Jun 13, 2021

Thanks for it!

SilviaIenciu commented Jan 7, 2022

Thanks for this.
Could you please also license this code of yours?

ThePredators commented Feb 6, 2022

Thanks for the script, but i don't know why when calling urlencode i got in the encoded data a : % at the end !

ironbishop commented Apr 4, 2022

i had to add a check for systems where collate is not set

if [ -n "$old_lc_collate" ] ; then LC_COLLATE=$old_lc_collate ; fi

milahu commented Sep 6, 2022

LC_ALL=C is needed to support unicode = loop bytes, not characters.
LC_COLLATE=C or LANG=C do not work.
this also must be set before ${#1} to get the length of $1 in bytes

#!/usr/bin/env bash
# MIT License

# encode special characters per RFC 3986
urlencode() {
    local LC_ALL=C # support unicode = loop bytes, not characters
    local c i n=${#1}
    for (( i=0; i<n; i++ )); do
        case "$c" in
            [-_.~A-Za-z0-9]) # also encode ;,/?:@&=+$!*'()# == encodeURIComponent in javascript
            #[-_.~A-Za-z0-9\;,/?:@\&=+\$!*\'\(\)#]) # dont encode ;,/?:@&=+$!*'()# == encodeURI in javascript
               printf '%s' "$c" ;;
            *) printf '%%%02X' "'$c" ;;

_test_urlencode() {
  local fname=urlencode
  local auml=$'\xC3\xA4' # ä = %C3%A4
  local euro=$'\xE2\x82\xAC' # € = %E2%82%AC
  local tick=$'\x60' # ` = %60
  local backtick=$'\xC2\xB4' # ´ = %C2%B4
  local input="a:/b c?d=e&f#g-+-;-,-@-\$-!-*-'-(-)-#-$tick-$backtick-$auml-$euro"
  # note: we expect uppercase hex codes from %02X format string
  local expected="a%3A%2Fb%20c%3Fd%3De%26f%23g-%2B-%3B-%2C-%40-%24-%21-%2A-%27-%28-%29-%23-%60-%C2%B4-%C3%A4-%E2%82%AC" # also encode ;,/?:@&=+$!*'()#
  #local expected="a:/b%20c?d=e&f#g-+-;-,-@-\$-!-*-'-(-)-#-%60-%C2%B4-%C3%A4-%E2%82%AC" # dont encode ;,/?:@&=+$!*'()#
  local actual="$($fname "$input")"
  if [[ "$actual" != "$expected" ]]; then
    echo "error in $fname"
    # debug
    echo "input: $input"
    echo "input hex:"; echo -n "$input" | hexdump -v -e '/1 "%02X"' | sed 's/\(..\)/\\x\1/g'; echo
    echo "input hexdump:"; echo -n "$input" | hexdump -C
    printf "actual:   "; echo "$actual"
    printf "expected: "; echo "$expected"
    exit 1

ThePredators commented Sep 7, 2022

This works for me.

rawurlencode() {
  local string="${1}"
  local strlen=${#string}
  local encoded=""
  local pos c o

  for (( pos=0 ; pos<strlen ; pos++ )); do
     case "$c" in
        [-_.~a-zA-Z0-9] ) o="${c}" ;;
        * )               printf -v o '%%%02x' "'$c"
  echo "${encoded}"    # You can either set a return variable (FASTER) 
  REPLY="${encoded}"   #+or echo the result (EASIER)... or both... :p

milahu commented Sep 7, 2022

@ThePredators this breaks on unicode

input: a:/b c?d=e&f#g-+-`-´-ä-€
input hex:
input hexdump:
00000000  61 3a 2f 62 20 63 3f 64  3d 65 26 66 23 67 2d 2b  |a:/b c?d=e&f#g-+|
00000010  2d 60 2d c2 b4 2d c3 a4  2d e2 82 ac              |-`-..-..-...|
actual:   a%3A%2Fb%20c%3Fd%3De%26f%23g-%2B-%60-%B4-%E4-%20AC
expected: a%3A%2Fb%20c%3Fd%3De%26f%23g-%2B-%60-%C2%B4-%C3%A4-%E2%82%AC

see my updated answer

