Skip to content

Instantly share code, notes, and snippets.

@dvoiss
Created March 19, 2012 07:37
Show Gist options
  • Save dvoiss/2101102 to your computer and use it in GitHub Desktop.
Save dvoiss/2101102 to your computer and use it in GitHub Desktop.
Made this for a friend - generates an xml feed for a podcast directory, didn't exactly finish it...reads id3v1 tags (id3v2 requires external libraries), runs applescript to open iTunes and load feed for testing, serves feed over http (iTunes required)
#!/bin/bash
# generate an xml podcast file for a directory of files
#
# I parse id3v1 tags but only for the artist and track title,
# most of the samples did not have reliable meta-data, so the
# file-name is used.
#
# I don't process id3v2 tags because an external library would be needed
# such as id3lib.
#
# requires python or ruby to be installed
# USAGE:
if [ "$1" = "-h" -o "$1" = "--help" ]; then
echo; echo "Usage: $0 [options]"; echo
echo -e "Run this script in a directory of mp3s you want to generate \
a podcast feed for. If you have multiple folders run the script at \
the top level to generate a feed for the items in the sub-directories."
echo;
echo -e "Note that certain assumptions are made, such as the file-name \
being used for the track title and artist, id3v1 tags are read \
for these when available. Due to unreliable metadata id3v1 tags \
aren't read to retrieve other attributes, id3v2 tags are not read \
at all as an external dependency such as id3lib would be required."
echo;
echo "== Options"; echo
echo -e "Pass the string \"tag\" to try to use id3v1 tags. \
Depending on the source of the mp3 files, many will not have reliable \
data. Try generating the feed with \"tag\" and determine whether it \
is any good."
exit 0
fi
# to escape HTML entities which can't be in the xml file
RUBY=$(which ruby)
PYTHON=$(which python)
if [ -z "${PYTHON}" -a -z "${RUBY}" ]; then
echo "ERROR:"
echo "You need to have Python or Ruby installed to use this script."
exit 0
fi
# script vars
# ===========
FILENAME="podcast.xml"
URL="http://localhost:8000/"
artist=''
image=''
result=''
id3v1=false
id3v1_extended=false
# rfc-2822 date format
date_to_use=`date "+%a, %b %d %Y %H:%M:%S %z"`
# do you want to try to read id3v1 tags?
if [ "$1" = "tag" ]; then
use_id3v1=true
else
use_id3v1=false
fi
# current directory
function get_dir() {
echo -e "${PWD##*/}"
}
# the local url, substitutes spaces ' ' => '%20'
function local_url() {
local result="$URL"
if [[ ! "$2" = "root" ]]; then
result+=`get_dir`'/'
fi
result+=`basename "$1"`
echo -e "$result" | sed 's/\ /%20/g'
}
# readline -f emulation
function readline_f() {
echo -e `cd $(dirname "$1"); pwd`/`basename "$1"`
}
# id3v1 processing
function is_id3v1() {
if [[ `tail -c 128 "$1" 2>/dev/null | head -c 3` == "TAG" ]]; then id3v1=true
else id3v1=false; fi
}
# id3v1-extended, how frequent are these? is this actually needed?
function is_id3v1_extended() {
if [[ `tail -c 227 "$1" 2>/dev/null | head -c 4` == "TAG+" ]]; then id3v1_extended=true
else id3v1_extended=false; fi
}
# match text as a capture group and any spaces after it,
# replace match with just the capture group:
function rtrim() {
sed 's/\([^ ]*\) *$/\1/';
}
# get rid of quotes, this is currently the source of slowness the script
function escape() {
local result=`echo $1 | sed s/[\'\"]//g`
if [ ! -z "${PYTHON}" ]; then
result=`"${PYTHON}" -c 'import cgi; print cgi.escape('"'$result'"');'`
elif [ ! -z "${RUBY}" ]; then
result=`"${RUBY}" -e 'require "cgi"; puts CGI.escapeHTML('"'$result'"');'`
fi
echo -e $result
}
# use just the name of the file (no extension)
function file_with_stripped_extension() {
filename=`basename $1`
echo -e ${filename%.*}
}
# get the title for id3v1 or id3v1 extended tags
function get_title() {
local result=''
if $id3v1; then
result=`tail -c 125 "$1" | head -c 30 | rtrim`
elif $id3v1_extended; then
result=`tail -c 223 "$1" | head -c 60 | rtrim`
fi
if [[ -z "$result" ]]; then
result=`file_with_stripped_extension "$1"` # use the filename
fi
echo -e $result
}
# get the artist for id3v1 or id3v1 extended tags
function get_artist() {
local result=''
if $id3v1; then
result=`tail -c 95 "$1" | head -c 30 | rtrim`
elif $id3v1_extended; then
result=`tail -c 163 "$1" | head -c 60 | rtrim`
fi
# if either of the above failed
if [[ -z "$result" ]]; then
# the global $artist is empty, use the current directory name
if [[ -z "$artist" ]]; then
result=`get_dir`
else
# otherwise use the globally set artist
result="$artist"
fi
fi
echo -e $result
}
function make_entry() {
# if it's an id3v1 tag, then try to get relevant info,
# I ignore track #s due to them not being particularly
# reliable (with the examples I'm using)
if $use_id3v1; then
echo "BLEH"
is_id3v1 "$1"
is_id3v1_extended "$1"
fi
title=`get_title "$1"`
result+="\n<item>"
result+="\n\t<title>`escape "$title"`</title>"
result+="\n\t<itunes:author>`escape "$2"`</itunes:author>"
# image, if available
if [[ -n "$3" ]]; then
result+="\n\t<itunes:image href=\"`escape "$(local_url "$3")"`\"/>"
fi
# no date info, just use the current date
result+="\n\t<pubDate>$date_to_use</pubDate>"
# url is the absolute path of the file on the file-system (readlink -f)
# length is the size in bytes
result+="\n\t<enclosure url=\"`escape "$(local_url "$1" "$4")"`\" length=\"`command ls -al "$1" | awk '{ print $5 }'`\" type=\"`file -b --mime-type "$1"`\"/>"
result+="\n</item>"
#echo -e $result
}
function process_directory() {\
echo -e "PROCESSING DIRECTORY: $1"
# get image if one exists, just use the first one we find,
# even if there are multiple images
for file in *.png *.jpg; do
if [[ -f $file ]]; then
image="$file"
break
fi
done
# use same artist for the directory
local artist=`get_artist "$1"`
# item info..
# audio files, sorted
# optionally can add the following: *.mov, *.m4v, *.pdf, *.epub
# "`command ls *.mp3 *.m4a *.mp4 2>/dev/null | sort -b -f -i`"
for file in *.mp3 *.m4a *.mp4; do
if [[ -f $file ]]; then
make_entry "$file" "$artist" "$image" "$2"
fi
done
#echo -e "$result"
}
function write_header() {
result+="<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
result+="\n<rss xmlns:itunes=\"http://www.itunes.com/dtds/podcast-1.0.dtd\" version=\"2.0\">"
result+="\n<channel>"
directory_name=`escape "$(get_dir)"`
# channel info
# assume the directory name is the name of the podcast
result+="\n<title>$directory_name</title>"
# try to get the artist from an id3 tag if available,
# otherwise use the directory name
result+="\n<itunes:author>`escape "$artist"`</itunes:author>"
# summary and description, use directory name
result+="\n<itunes:summary>$directory_name</itunes:summary>"
result+="\n<description>$directory_name</description>"
if [[ -n "$image" ]]; then
result+="\n<itunes:image href=\"$image\" />"
fi
}
function write_footer() {
result+="\n</channel>"
result+="\n</rss>"
}
function write_file() {
# set global artist
artist="`get_dir`"
write_header
# top-level-directory
process_directory "`pwd`" 'root'
# process sub-directories
for dir in *; do
if [ -d "$dir" ]; then
cd "$dir"
process_directory "$dir"
cd ..
fi
done
write_footer
echo -e "$result" > $FILENAME
}
# OUTPUT:
echo
echo "GENERATING PODCAST FEED"
echo "======================="
echo "Writing to: $FILENAME"
# write the actual file
write_file
echo "Finished writing to file: $FILENAME"
# don't run any of the crap below...
exit 0
# test feed in iTunes:
if [ ! -z "${PYTHON}" ]; then
echo
echo "Attempting to open iTunes to subscribe..."
# run applescript which opens iTunes and the podcast subscribe window
`osascript \
-e 'tell application "iTunes" to activate' \
-e 'tell application "System Events" to click menu item "Subscribe to \
Podcast…" of menu "Advanced" of menu bar 1 of process "iTunes"' \
-e 'tell application "System Events" to tell process "iTunes" to tell \
window "Subscribe to Podcast" to set value of text field 1 to \
"'"$URL$FILENAME"'"'`
echo
echo "Starting server..."
echo "(because the feed cannot be served to iTunes as a file-system path,"
echo "itunes requires a URL, use $URL$FILENAME)"
echo
echo "Press Ctrl-C to stop the server after retrieving episodes in iTunes..."
# run simple server from directory
`"${PYTHON}" -m SimpleHTTPServer`
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment