davemo/posterous-convert.sh

## posterous-convert.sh
#!/bin/bash

##### DESCRIPTION

# Convert all HTML files in the posterous backup directories into Markdown.

# This script works with the official backup that you can download from Posterous.

# Modified by @davemo using code from @rdegges https://github.com/rdegges/posterous-to-markdown
# which was written for a 3rd party backup tool.

##### REQUIREMENTS

# PANDOC

# If you're on Ubuntu, you can install Pandoc by running sudo apt-get -y install pandoc.

# If you're on OS X or Windows, you can install Pandoc by using one of their provided installers.
# http://code.google.com/p/pandoc/downloads/list

##### USAGE

# 1. Sign into Posterous and backup your content: http://posterous.com/#backup
# 2. Extract the .zip file Posterous gives you
# 3. In terminal, cd into the `posts` directory inside the backup folder
# 4. curl -L https://gist.github.com/davemo/5473387/raw | bash

##### GLOBALS

PANDOC=`which pandoc`
OUTDIR='markdown'
FILES=`ls -R . | awk '
/:$/&&f{s=$0;f=0}
/:$/&&!f{sub(/:$/,"");s=$0;f=1;next}
NF&&f{ print s"/"$0 }' | grep html | awk '{print substr($0, 3)}'`

##### ENSURE PANDOC

if [ -z "$PANDOC" ]; then
    echo "Pandoc not found! Please install Pandoc and try again."
    exit
fi

##### ENSURE MARKDOWN DIRECTORY

if [ ! -d ./markdown ]; then
    mkdir ./markdown
fi

##### HELPERS

function get_posterous_publication_day {
  # <span class='post_time'>August 27 2008,  4:30 PM</span>
  # extract and pad with zeroes to get a two digit number
  cat $1 | grep "post_time" | cut -d " " -f 3-4 | awk '{print substr($0, 0, 2)}' | awk '{printf "%02d\n", $0; }'
}

##### PROCESS THE FILES

for file in ${FILES}; do
    base=${file%.*}               # just the path and filename, without a trailing .html
    filename=$(echo ${base} | cut -d "/" -f 3)
    # publication date info
    day=$(get_posterous_publication_day ${file})
    year=$(echo ${file%/*} | cut -d "/" -f 1)
    month=$(echo ${file%/*} | cut -d "/" -f 2)
    pandoc -s --atx-headers --columns=80 -r html ${base}.html -o ${OUTDIR}/${year}-${month}-${day}-${filename}.md
    echo "Converting file $file to Markdown... done"
done

echo "All HTML posts have been converted to Markdown and stored in the 'markdown' folder!"
	#!/bin/bash

	##### DESCRIPTION

	# Convert all HTML files in the posterous backup directories into Markdown.

	# This script works with the official backup that you can download from Posterous.

	# Modified by @davemo using code from @rdegges https://github.com/rdegges/posterous-to-markdown
	# which was written for a 3rd party backup tool.

	##### REQUIREMENTS

	# PANDOC

	# If you're on Ubuntu, you can install Pandoc by running sudo apt-get -y install pandoc.

	# If you're on OS X or Windows, you can install Pandoc by using one of their provided installers.
	# http://code.google.com/p/pandoc/downloads/list

	##### USAGE

	# 1. Sign into Posterous and backup your content: http://posterous.com/#backup
	# 2. Extract the .zip file Posterous gives you
	# 3. In terminal, cd into the `posts` directory inside the backup folder
	# 4. curl -L https://gist.github.com/davemo/5473387/raw \| bash

	##### GLOBALS

	PANDOC=`which pandoc`
	OUTDIR='markdown'
	FILES=`ls -R . \| awk '
	/:$/&&f{s=$0;f=0}
	/:$/&&!f{sub(/:$/,"");s=$0;f=1;next}
	NF&&f{ print s"/"$0 }' \| grep html \| awk '{print substr($0, 3)}'`

	##### ENSURE PANDOC

	if [ -z "$PANDOC" ]; then
	echo "Pandoc not found! Please install Pandoc and try again."
	exit
	fi

	##### ENSURE MARKDOWN DIRECTORY

	if [ ! -d ./markdown ]; then
	mkdir ./markdown
	fi

	##### HELPERS

	function get_posterous_publication_day {
	# <span class='post_time'>August 27 2008, 4:30 PM</span>
	# extract and pad with zeroes to get a two digit number
	cat $1 \| grep "post_time" \| cut -d " " -f 3-4 \| awk '{print substr($0, 0, 2)}' \| awk '{printf "%02d\n", $0; }'
	}

	##### PROCESS THE FILES

	for file in ${FILES}; do
	base=${file%.*} # just the path and filename, without a trailing .html
	filename=$(echo ${base} \| cut -d "/" -f 3)
	# publication date info
	day=$(get_posterous_publication_day ${file})
	year=$(echo ${file%/*} \| cut -d "/" -f 1)
	month=$(echo ${file%/*} \| cut -d "/" -f 2)
	pandoc -s --atx-headers --columns=80 -r html ${base}.html -o ${OUTDIR}/${year}-${month}-${day}-${filename}.md
	echo "Converting file $file to Markdown... done"
	done

	echo "All HTML posts have been converted to Markdown and stored in the 'markdown' folder!"