Skip to content

Instantly share code, notes, and snippets.

@tlrobinson
Created July 10, 2011 06:47
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save tlrobinson/1074339 to your computer and use it in GitHub Desktop.
Save tlrobinson/1074339 to your computer and use it in GitHub Desktop.
Bash XML parser and stack, plus Gmail XML feed parsing example
#!/usr/bin/env bash
# Bash Stack
set -o errtrace
set -o errexit
set -o nounset
declare -a stack
stack_push () {
[ $# -eq 1 ] || _stack_err "stack_push takes one argument" || return 1
stack[${#stack[@]}]="$1"
}
stack_pop () {
index=$(_stack_index) || _stack_err "index out of range" || return 1
unset stack[$index]
}
stack_get () {
[ $# -ge 1 ] && item="$1" || item="0"
index=$(_stack_index $item) || _stack_err "index out of range" || return 1
echo ${stack[$index]}
}
stack_list () {
echo ${stack[@]}
}
stack_size () {
echo ${#stack[@]}
}
# internal:
_stack_index () {
[ $# -ge 1 ] && index="$1" || index="0" || true
[ "$index" -ge 0 ] && [ "$index" -lt ${#stack[@]} ] || return 1
expr ${#stack[@]} - "$index" - 1 || true
}
_stack_err () {
echo "Stack error: $@" 1>&2
return 1
}
#!/usr/bin/env bash
# Bash XML parser (SAX)
#
# EXAMPLE: echo '<foo><bar>hello world</bar></foo>' | ./bash-xml.bash 'echo START:' 'echo END:' 'echo TEXT:'
#
set -o errtrace
set -o errexit
set -o nounset
bash_xml_sax_parse () {
handle_element_start="$1"
handle_element_end="$2"
handle_characters="$3"
# assumes each line contains one element
cat "/dev/stdin" | bash_xml_split | while read line; do
case "$line" in
"<?"*) ;;
"</"*) [ -z "$handle_element_end" ] || $handle_element_end "$line" "$(expr "$line" : '</*\([^ />]*\)')" ;;
"<"*"/>") [ -z "$handle_element_start" ] || $handle_element_start "$line" "$(expr "$line" : '</*\([^ />]*\)')"
[ -z "$handle_element_end" ] || $handle_element_end "$line" "$(expr "$line" : '</*\([^ />]*\)')" ;;
"<"*) [ -z "$handle_element_start" ] || $handle_element_start "$line" "$(expr "$line" : '</*\([^ />]*\)')" ;;
*) [ -z "$handle_characters" ] || $handle_characters "$line" ;;
esac
done
}
# splits an XML document into a stream of lines containing one element each and removes blanks
# TODO: make this more robust
bash_xml_split () {
sed -e 's/</\
</g' -e 's/>/>\
/g' | sed -e '/^ *$/d'
}
if [ "$0" == "$BASH_SOURCE" ]; then
bash_xml_split | bash_xml_sax_parse "$@"
fi
#!/usr/bin/env bash
# Gmail inbox feed parser
#
# EXAMPLE: ./gmail.bash GMAIL_USER
#
# GMAIL_USER can include username and optionally a password
set -o errtrace
set -o errexit
set -o nounset
parent=$(dirname -- "$BASH_SOURCE")
source "$parent/bash-xml.bash"
source "$parent/bash-stack.bash"
gmail_parse_feed () {
bash_xml_sax_parse gmail_parse_feed_element_start gmail_parse_feed_element_end gmail_parse_feed_characters
}
gmail_parse_feed_element_start () {
[ "$2" != "entry" ] || (gmail_entry_name="" && gmail_entry_title="" && gmail_entry_date="")
stack_push "$2"
}
gmail_parse_feed_element_end () {
[ "$2" != "entry" ] || printf "%20s | %-25s | %s\n" "$gmail_entry_date" "$gmail_entry_name" "$gmail_entry_title"
stack_pop
}
gmail_parse_feed_characters () {
case "$(stack_list)" in
"feed entry title") gmail_entry_title="$1" ;;
"feed entry issued") gmail_entry_date="$1" ;;
"feed entry author name") gmail_entry_name="$1" ;;
esac
}
gmail_fetch_feed () {
curl -u "$1" --silent 'https://mail.google.com/mail/feed/atom'
}
gmail_inbox () {
if [ $# -gt 0 ]; then
GMAIL_USER="$1"
fi
gmail_fetch_feed "$GMAIL_USER" | gmail_parse_feed
}
if [ "$0" == "$BASH_SOURCE" ]; then
gmail_inbox "$@"
fi
#!/usr/bin/env bash
# XML prettifier
#
# EXAMPLE: echo '<foo><bar>hello world</bar></foo>' | ./xml-prettify.bash
#
set -o errtrace
set -o errexit
set -o nounset
parent=$(dirname -- "$BASH_SOURCE")
source "$parent/bash-xml.bash"
source "$parent/bash-stack.bash"
xml_prettify_indent="2"
xml_prettify_print () {
printf "%*s%s\n" $(expr $(stack_size) \* $xml_prettify_indent) "" "$1"
}
xml_prettify () {
bash_xml_sax_parse xml_prettify_start xml_prettify_end xml_prettify_characters
}
xml_prettify_start () {
case "$1" in
*"/>") ;; # HACK: skip self terminating tags
*) xml_prettify_print "$1" ;;
esac
stack_push "$2"
}
xml_prettify_end () {
stack_pop
xml_prettify_print "$1"
}
xml_prettify_characters () {
xml_prettify_print "$1"
return 0
}
if [ "$0" == "$BASH_SOURCE" ]; then
xml_prettify
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment