urlencode() { | |
# urlencode <string> | |
old_lc_collate=$LC_COLLATE | |
LC_COLLATE=C | |
local length="${#1}" | |
for (( i = 0; i < length; i++ )); do | |
local c="${1:$i:1}" | |
case $c in | |
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;; | |
*) printf '%%%02X' "'$c" ;; | |
esac | |
done | |
LC_COLLATE=$old_lc_collate | |
} | |
urldecode() { | |
# urldecode <string> | |
local url_encoded="${1//+/ }" | |
printf '%b' "${url_encoded//%/\\x}" | |
} |
Looks good. Shellcheck complains about the printf printing the character directly. Easily solvable to be shellcheck-clean:
< [a-zA-Z0-9.~_-]) printf "%s" "$c" ;;
---
> [a-zA-Z0-9.~_-]) printf "$c" ;;
Great script! Thanks!
For fish users
function urlencode
set str (string join ' ' $argv)
for c in (string split '' $str)
if string match -qr '[a-zA-Z0-9.~_-]' $c
env LC_COLLATE=C printf "$c"
else
env LC_COLLATE=C printf '%%%02X' "'$c"
end
end
end
function urldecode
set url_encoded (string replace -a '+' ' ' $argv[1])
printf '%b' (string replace -a '%' '\\x' $url_encoded)
end
urlencode() {
# urlencode <string>
old_lc_collate=$LC_COLLATE
LC_COLLATE=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf "$c" ;;
' ') printf "%%20" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
LC_COLLATE=$old_lc_collate
}
paxsalis version works with bash like charm, but not with bourne (
That snippet worked with bourne
urlencode() {
# urlencode <string>
old_lc_collate=$LC_COLLATE
LC_COLLATE=C
local i=1
local length="${#1}"
while [ $i -le $length ]
do
local c=$(echo "$(expr substr $1 $i 1)")
case $c in
[a-zA-Z0-9.~_-]) printf "$c" ;;
' ') printf "%%20" ;;
*) printf '%%%02X' "'$c" ;;
esac
i=`expr $i + 1`
done
LC_COLLATE=$old_lc_collate
}
On zsh I was getting unrecognized modifier 'i'
until I changed the following line:
- local c="${1:i:1}"
+ local c="${1:$i:1}"
amazing solution. thanks
On zsh I was getting
unrecognized modifier 'i'
until I changed the following line:- local c="${1:i:1}" + local c="${1:$i:1}"
@cdown, I think this should be replaced on your gist.
urldecode "abc%40abc.com" returns "abc%40abc.com".
Not working.
It works just fine.
$ urldecode() {
> # urldecode <string>
>
> local url_encoded="${1//+/ }"
> printf '%b' "${url_encoded//%/\\x}"
> }
$ urldecode "abc%40abc.com"
abc@abc.com
I've made a small screencast. Would you mind watching this, please?
https://www.dropbox.com/s/dul4wipk59o2ttk/urldecode_not_working_gist_1163649.webm?dl=0
No, It's your own gist. I've just modified a line 'local c="${1:$i:1}"' according to @krin-san. I did only because I was getting "unrecognized modifier 'i'" error on zsh.
Lovely! Thank you all!
@rajeshisnepali for urldecode to work with my zsh:
In zsh ${url_encoded//%/\\x}
adds a \x to the end but ${url_encoded//\%/\\x}
replaces % with \x.
lri - https://unix.stackexchange.com/questions/159253/decoding-url-encoding-percent-encoding
also, in urlencode, $i could be made local
local i length="${#1}"
@mountaineerbr Thanks for the information but I couldn't make the above script work.
But it worked using an alias from the python3 script 👍 (from the link above).
I'm seeing strings with 2 or more consecutive spaces get shrunk to 1 space. So %20%20 or ' '
changes to only ' '. Any thoughts on how to not truncate these spaces?
@cjplay02 I'm pretty sure the issue is elsewhere.
$ urldecode() {
# urldecode <string>
local url_encoded="${1//+/ }"
printf '%b' "${url_encoded//%/\\x}"
}
$ urldecode 'foo%20%20%20%20%20bar'
foo bar
Good call @cdown. I had the urldecode call in a command substitution - urldecoded=$(urldecode 's3://...')
. Once I removed the function call from the command substitution, the spaces were retained from the encoding. Now I just need to find a better way to declare the result as a variable...
Edit. Double Quoting around the variable's presentation in downstream commands fixed my issue. Ie echo "$varname"
just a brief nod to mawk
which is five times faster in my tests
(indeed, often faster than sed)
I know it's not a de facto standard like bash
i.e. installed by default on so many systems
but it should be and it is on my systems
I also notice that bash seems to be catching up with ksh93
One line implementation, suitable for storing in .bashrc
urle () { [[ "${1}" ]] || return 1; local LANG=C i x; for (( i = 0; i < ${#1}; i++ )); do x="${1:i:1}"; [[ "${x}" == [a-zA-Z0-9.~_-] ]] && echo -n "${x}" || printf '%%%02X' "'${x}"; done; echo; }
urld () { [[ "${1}" ]] || return 1; : "${1//+/ }"; echo -e "${_//%/\\x}"; }
Thanks for it!
Thanks for this.
Could you please also license this code of yours?
Thanks for the script, but i don't know why when calling urlencode i got in the encoded data a : % at the end !
i had to add a check for systems where collate is not set
if [ -n "$old_lc_collate" ] ; then LC_COLLATE=$old_lc_collate ; fi
LC_ALL=C
is needed to support unicode = loop bytes, not characters.
LC_COLLATE=C
or LANG=C
do not work.
this also must be set before ${#1}
to get the length of $1
in bytes
#!/usr/bin/env bash
# MIT License
# encode special characters per RFC 3986
urlencode() {
local LC_ALL=C # support unicode = loop bytes, not characters
local c i n=${#1}
for (( i=0; i<n; i++ )); do
c="${1:i:1}"
case "$c" in
[-_.~A-Za-z0-9]) # also encode ;,/?:@&=+$!*'()# == encodeURIComponent in javascript
#[-_.~A-Za-z0-9\;,/?:@\&=+\$!*\'\(\)#]) # dont encode ;,/?:@&=+$!*'()# == encodeURI in javascript
printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
echo
}
_test_urlencode() {
local fname=urlencode
local auml=$'\xC3\xA4' # ä = %C3%A4
local euro=$'\xE2\x82\xAC' # € = %E2%82%AC
local tick=$'\x60' # ` = %60
local backtick=$'\xC2\xB4' # ´ = %C2%B4
local input="a:/b c?d=e&f#g-+-;-,-@-\$-!-*-'-(-)-#-$tick-$backtick-$auml-$euro"
# note: we expect uppercase hex codes from %02X format string
local expected="a%3A%2Fb%20c%3Fd%3De%26f%23g-%2B-%3B-%2C-%40-%24-%21-%2A-%27-%28-%29-%23-%60-%C2%B4-%C3%A4-%E2%82%AC" # also encode ;,/?:@&=+$!*'()#
#local expected="a:/b%20c?d=e&f#g-+-;-,-@-\$-!-*-'-(-)-#-%60-%C2%B4-%C3%A4-%E2%82%AC" # dont encode ;,/?:@&=+$!*'()#
local actual="$($fname "$input")"
if [[ "$actual" != "$expected" ]]; then
echo "error in $fname"
# debug
echo "input: $input"
echo "input hex:"; echo -n "$input" | hexdump -v -e '/1 "%02X"' | sed 's/\(..\)/\\x\1/g'; echo
echo "input hexdump:"; echo -n "$input" | hexdump -C
printf "actual: "; echo "$actual"
printf "expected: "; echo "$expected"
exit 1
fi
}
_test_urlencode
This works for me.
https://stackoverflow.com/questions/296536/how-to-urlencode-data-for-curl-command
rawurlencode() { local string="${1}" local strlen=${#string} local encoded="" local pos c o for (( pos=0 ; pos<strlen ; pos++ )); do c=${string:$pos:1} case "$c" in [-_.~a-zA-Z0-9] ) o="${c}" ;; * ) printf -v o '%%%02x' "'$c" esac encoded+="${o}" done echo "${encoded}" # You can either set a return variable (FASTER) REPLY="${encoded}" #+or echo the result (EASIER)... or both... :p }
@ThePredators this breaks on unicode
input: a:/b c?d=e&f#g-+-`-´-ä-€
input hex:
\x61\x3A\x2F\x62\x20\x63\x3F\x64\x3D\x65\x26\x66\x23\x67\x2D\x2B\x2D\x60\x2D\xC2\xB4\x2D\xC3\xA4\x2D\xE2\x82\xAC
input hexdump:
00000000 61 3a 2f 62 20 63 3f 64 3d 65 26 66 23 67 2d 2b |a:/b c?d=e&f#g-+|
00000010 2d 60 2d c2 b4 2d c3 a4 2d e2 82 ac |-`-..-..-...|
0000001c
actual: a%3A%2Fb%20c%3Fd%3De%26f%23g-%2B-%60-%B4-%E4-%20AC
expected: a%3A%2Fb%20c%3Fd%3De%26f%23g-%2B-%60-%C2%B4-%C3%A4-%E2%82%AC
@ThePredators works like a charm 👍
Hi,
Characters used in France are not taken into account: (é è à ù ê â û ...) if you work in fr_FR locale.
You need to convert your data source from Windows-1252 to UTF-8 before entering in the function ::
data_utf8=$(echo "$data_ISO" | iconv -f iso8859-1 -t utf-8)
have a little problem with Chinese Character.
this solution below using curl command to encode url can work with Chinese Character.
function urlencode() {
if [[ $# != 1 ]]; then
echo "Usage: $0 string-to-urlencode"
return 1
fi
local data="$(curl -s -o /dev/null -w %{url_effective} --get --data-urlencode "$1" "")"
if [[ $? == 0 ]]; then
echo "${data##/?}"
fi
return 0
}
https://gist.github.com/westfly/ed7e25ee4353751d94132f92837a7074