Skip to content

Instantly share code, notes, and snippets.

@mftrhu
Created December 24, 2017 22:10
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mftrhu/235aef617e0376ad110ee4c4b47dba47 to your computer and use it in GitHub Desktop.
Save mftrhu/235aef617e0376ad110ee4c4b47dba47 to your computer and use it in GitHub Desktop.
ddg.sh - Search the internet with DuckDuckGo from your terminal
#!/bin/sh
# ddg.sh - Search the internet with DuckDuckGo from your terminal
# Made with boredom, on Christmas Eve, by mftrhu
# Requirements:
# awk, curl OR wget, sed, head, tail, grep, tput (MAYBE)
available () {
command -v "$1" >/dev/null 2>&1
}
split () {
awk -F "$1" "{print \$$2}" -
}
between () {
split "$1" 2 | split "$2" 1
}
bold () {
sed -r -e "s|<b>|$(tput bold)|g" \
-e "s|</b>|$(tput sgr0)|g"
}
urldecode () {
awk -niord '{ printf RT?$0chr("0x"substr(RT, 2)):$0 }' RS=%..
}
urlencode () {
#TODO: actually do more shit here
sed -e 's/ /\+/g'
}
htmlstrip () {
sed -r -e "s|</?[a-zA-Z]+>||g"
}
htmldecode () {
sed -e "s/&quot;/'/g" -e "s/&#x27;/'/g" -e 's/&amp;/\&/g' # | \
# awk -niord '{ printf RT?$0chr("0x"substr(RT, 4)):$0 }' RS='&#x..;'
#BUG: awk gives up the ghost, sometimes, first seen with `-p 4 bah`
}
usage () {
printf "Usage: %s [-hdq] [-g N] [-p P] QUERY...\n" "$0"
printf "Performs a DuckDuckGo search and prints the results.\n" | fmt -w "$cols"
printf "\n"
printf "DuckDuckGo bangs are (mostly) unsupported, and will just be opened in the browser.\n" | fmt -w "$cols"
printf "\n"
printf " -h shows this help and quits\n"
printf " -l opens the first result in the browser\n"
printf " -g N opens the Nth result in the browser\n"
printf " -p P shows the Pth page of results\n"
printf " -q does not print the descriptions\n"
printf " -d dumps the results as tab-separated values\n"
printf " -c enable colors (requires tput)\n"
printf " -n disable colors\n"
exit "$1"
}
error() {
printf "${bold}${error_c}Error:${reset} %s.\n" "$2" >&2
if [ -n "$3" ]; then
printf "\n%s\n" "$3" >&2
fi
exit "$1"
}
download () {
if available curl; then
curl "$1" 2>/dev/null; return $?
elif available wget; then
wget "$1" -q -O-; return $?
fi
error 4 "no downloader installed" \
"Install curl or wget before proceeding."
}
pager () {
if [ -n "$DDG_PAGER" ] && available "$DDG_PAGER"; then
"$DDG_PAGER"; return $?
elif [ -n "$PAGER" ] && available "$PAGER"; then
"$PAGER"; return $?
elif available less; then
less -r; return $?
fi
cat; return $?
}
browser () {
if [ -n "$DDG_BROWSER" ] && available "$DDG_BROWSER"; then
"$DDG_BROWSER" "$1"; return $?
elif [ -n "$BROWSER" ] && available "$BROWSER"; then
"$BROWSER" "$1"; return $?
elif available lynx; then
lynx "$1"; return $?
elif available w3m; then
w3m "$1"; return $?
fi
error 4 "no browser installed" \
"Install a browser before proceeding."
}
cache_dir="$HOME/.cache/ddg"
purge_cache () {
rm -f "${cache_dir}/query"
rm -f "${cache_dir}/raw"
rm -f "${cache_dir}/urls"
rm -f "${cache_dir}/titles"
rm -f "${cache_dir}/desc"
}
check_age () {
#TODO
true
}
check_cache () {
mkdir -p "${cache_dir}"
if [ -r "${cache_dir}/query" ]; then
old_query=$(cat "${cache_dir}/query")
if [ -z "$query" ] || [ "${query}" = "${old_query}" ]; then
check_age
if [ ! -s "${cache_dir}/urls" ]; then
purge_cache
error 2 "no cached search results" \
"Try again with a non-empty QUERY."
fi
else
purge_cache
fi
else
if [ -z "$query" ]; then
error 2 "no cached search results" \
"Try again with a non-empty QUERY."
fi
purge_cache
fi
}
clean_up () {
line_cut=$(grep -n "${cache_dir}/tmp" -e "result-sponsored" | \
cut -d ':' -f 1 | \
tail -1)
[ -z "$line_cut" ] && line_cut=$(grep -n "${cache_dir}/tmp" -e "</style>" | \
cut -d ':' -f 1)
tail -n +"${line_cut}" "${cache_dir}/tmp" > "${cache_dir}/raw"
rm -f "${cache_dir}/tmp"
}
extract_urls () {
grep "${cache_dir}/raw" -e "result-link" | \
cut -d '"' -f 4 | \
cut -d '=' -f 3 | \
urldecode >> "${cache_dir}/urls"
}
extract_titles () {
grep "${cache_dir}/raw" -e "result-link" | \
cut -d ">" -f 2- | \
split "</a>" 1 | \
htmlstrip | \
htmldecode >> "${cache_dir}/titles"
}
extract_descriptions () {
grep "${cache_dir}/raw" -e "result-snippet" -A 2 | \
grep -v -e "<td.*>" -e "</td>" -e "--" | \
sed 's/^ \+//g' | \
bold | \
htmldecode >> "${cache_dir}/desc"
}
collate () {
paste "${cache_dir}/titles" "${cache_dir}/urls" "${cache_dir}/desc"
}
pull_results () {
results_wanted=$((page * page_size))
[ "$index" -gt "$results_wanted" ] && results_wanted="$index"
if [ -r "${cache_dir}/urls" ]; then
len=$(wc -l "${cache_dir}/urls" | cut -d ' ' -f 1)
else
len=0
fi
while [ "$len" -lt "$results_wanted" ]; do
download "https://duckduckgo.com/lite?q=${query}&sc=${len}" > "${cache_dir}/tmp"
if [ ! $? -eq 0 ]; then
error 3 "no connection to the internet" \
"Check your connection and try again."
fi
clean_up
extract_urls
extract_titles
extract_descriptions
len=$(wc -l "${cache_dir}/urls" | cut -d ' ' -f 1)
done
}
paginate () {
tail -n +"$(( ( page - 1 ) * page_size + 1 ))" | \
head -n "$(( page_size ))"
}
do_jump () {
url=$(tail "${cache_dir}/urls" -n +"$1" | head -1)
browser "$url"
exit 0
}
do_urls () {
paginate < "${cache_dir}/urls"
exit 0
}
do_dump () {
collate | paginate
exit 0
}
do_show () {
counter="$(( (page - 1) * page_size ))"
collate | paginate | while read -r line; do
url="$(echo "$line" | cut -f 2)"
title="$(echo "$line" | cut -f 1 | fmt -s -w "$width")"
counter=$((counter + 1))
echo "$title" | while read -r l; do
if [ -z "$n" ] ; then
printf "[%3d] ${bold}${title_c}%s${reset}\n" "$counter" "$l"
else
printf " ${bold}${title_c}%s${reset}\n" "$l"
fi
n=$((n + 1))
done
printf " ${url_c}<%s>${reset}\n" "$url"
if $descriptions; then
desc="$(echo "$line" | cut -f 3 | fmt -w "$width")"
echo "$desc" | while read -r l; do
printf " %s\n" "$l"
done
fi
done | pager
exit 0
}
check_query () {
first_char=$(echo "$query" | cut -c 1)
if [ "${first_char}" = "!" ]; then
accel=$(echo "$query" | cut -d ' ' -f 1)
rest=$(echo "$query" | cut -d ' ' -f 2-)
case "${accel}" in
(!g|!google) ./google.sh "${rest}" ;;
(*) browser "https://duckduckgo.com/lite?q=${query}" ;;
esac
exit 0
fi
}
color () {
case "$1" in
(black) tput setaf 0 ;;
(red) tput setaf 1 ;;
(green) tput setaf 2 ;;
(yellow) tput setaf 3 ;;
(blue) tput setaf 4 ;;
(magenta) tput setaf 5 ;;
(cyan) tput setaf 6 ;;
(white) tput setaf 7 ;;
esac
}
init_colors () {
[ -z "$DDG_TITLE_COLOR" ] && title_c="yellow" || title_c="$DDG_TITLE_COLOR"
[ -z "$DDG_URL_COLOR" ] && url_c="blue" || url_c="$DDG_URL_COLOR"
title_c=$(color "$title_c")
url_c=$(color "$url_c")
error_c=$(color "red")
bold=$(tput bold)
reset=$(tput sgr0)
}
init_vars () {
cols=80
available tput && cols=$(tput cols)
width=$(( cols - 6 - 4 ))
page_size="$DDG_PAGE_SIZE"
[ -z "$DDG_PAGE_SIZE" ] && page_size=10
colors="$DDG_COLORS"
[ -z "$DDG_COLORS" ] && colors=true
secret_plan="show"
descriptions=true
index=0
page=1
error_c=""
title_c=""
url_c=""
bold=""
reset=""
}
init_vars
while getopts "hlduqg:p:nc" o; do
case "${o}" in
(d) secret_plan="dump" ;;
(l) secret_plan="jump"
index=1 ;;
(g) secret_plan="jump"
index="$OPTARG" ;;
(p) page="$OPTARG" ;;
(q) descriptions=false ;;
(u) secret_plan="urls" ;;
(n) colors=false ;;
(c) colors=true ;;
(h) usage 0 ;;
(*) usage 1 ;;
esac
done
shift $((OPTIND - 1))
query=$(printf "$*" | urlencode)
if "$colors" && available tput; then
init_colors
fi
check_query
check_cache
printf "$query" > "${cache_dir}/query"
pull_results
case "${secret_plan}" in
(dump) do_dump ;;
(jump) do_jump "$index" ;;
(urls) do_urls ;;
(show) do_show ;;
esac
@C00kiie
Copy link

C00kiie commented Dec 25, 2017

Beautiful

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment