Skip to content

Instantly share code, notes, and snippets.

@The-Judge
Forked from Lixivial/confluence_export.sh
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save The-Judge/c32f0f14f0fbc97068aa to your computer and use it in GitHub Desktop.
Save The-Judge/c32f0f14f0fbc97068aa to your computer and use it in GitHub Desktop.
#!/bin/bash
# set -x
# Jesse Pearson
# Shell script to output all Confluence Spaces to PDF and optionally
# push them to a SMB or some other share.
#
# Dependencies:
# wget
#
# Optional dependencies:
# smbclient for pushing to a samba share.
#
# Changelog:
# 02.21.10 - jp - Fixed issues, made it use POST.
#
# 11.20.07 - jp - Added HTML export to grab attachments, too.
#
# 11.19.07 - jp - Changed SQL strings to be confUser and confPass.
# - Push output of PDFs to \\nat01\IS-Temps\Confluence PDF Exports\
# - Fixed logging
# - Made "Space Name" the title of the PDF, rather than a randomly generated name.
#
# 11.08.07 - jp - Initial creation date.
#
# Vars:
# spaceName - Array containing the space names.
# spaceID - Array containing the space IDs.
# spaceKey - Array containing the space Keys to be used in the download URL.
# contentID - Array containing each space's content pages.
# URLP - URL containing all the getContentBy strings used to grab PDF.
# URLH - URL containing all the getContentBy strings used to grab HTML.
# sRun - Path from which the directory is run
# t* - Temporary holding variables
# confUser - Confluence db user
# confPass - Confluence db pass
#
# Setup Path
sRun="/home/jpearson/confluence_export"
# Query strings
tSN="SELECT SPACENAME FROM confdb.SPACES WHERE SPACETYPE='global'"
tSID="SELECT SPACEID FROM confdb.SPACES WHERE SPACETYPE='global'"
tSK="SELECT SPACEKEY FROM confdb.SPACES WHERE SPACETYPE='global'"
tDate=`date "+%m.%d.%Y_%I:%M:%S"`
# Setup db access vars
confUser="{conf_db_user}"
confPass="{conf_db_password}"
# Declare and instantiate the space* arrays
declare -a spaceName="`echo $tSN | mysql -u $confUser -p$confPass confdb -Bs | tr '\\n' '|'`"
declare -a spaceID="`echo $tSID | mysql -u $confUser -p$confPass confdb -Bs`"
declare -a spaceKey="`echo $tSK | mysql -u $confUser -p$confPass confdb -Bs`"
#Array index to get SPACEKEY reference later.
c=1
`wget --keep-session-cookies --save-cookies cookies.txt --post-data 'os_username={confluence_username}&os_password={confluence_password}' http://{server_name}:8080/confluence/login.action`
# Iterate through each Space
for sID in ${spaceID[@]}
do
# Query string to get contentIDs
tCP="SELECT CONTENTID FROM confdb.CONTENT WHERE SPACEID=${sID}"
tURL=`echo ${spaceKey[0]} | cut -f$c -d " "`
tSN=`echo ${spaceName[0]} | cut -f$c -d "|"`
# Get all the subcontent relating to this space.
contentID="`echo $tCP | mysql -u $confUser -p$confPass confdb -Bs`"
# Begin building the URLs
URLP="http://{server_name}:8080/confluence/spaces/doexportspace.action?key=$tURL"
URLH="http://{server_name}:8080/confluence/spaces/doexportspace.action?key=$tURL"
POSTDATAP="type=TYPE_PDF"
POSTDATAH="type=TYPE_HTML"
# Prepend &contentToBeExported= to each ContentID to begin making the URL
for tID in ${contentID[@]}
do
POSTDATAP="$POSTDATAP&contentToBeExported=${tID}"
POSTDATAH="$POSTDATAH&contentToBeExported=${tID}"
done
# Add authentication information to automatically login to Confluence.
POSTDATAP="$POSTDATAP&includeComments=true&confirm=Export"
POSTDATAH="$POSTDATAH&includeComments=true&backupAttachments=true&confirm=Export"
# Log output for PDF URLs, and its wget output.
`echo "PDF URL for $tURL: $URLP" >> "$sRun"/logs/"$tDate"_output.log`
`echo "PDF POST data for $tURL: $POSTDATAP" >> "$sRun"/logs/"$tDate"_output.log`
`echo "" >> "$sRun"/logs/"$tDate"_output.log`
`echo "wget for PDF: " >> "$sRun"/logs/"$tDate"_output.log`
`wget --load-cookies cookies.txt --post-data "$POSTDATAP" -t 5 -O "$sRun/$tSN.pdf" -p $URLP 2>> "$sRun"/logs/"$tDate"_output.log`
`echo "" >> "$sRun"/logs/"$tDate"_output.log`
# Log output for HTML URLs, and the wget output.
`echo "HTML URL for $tURL: $URLH" >> "$sRun"/logs/"$tDate"_output.log`
`echo "HTML POST data for $tURL: $POSTDATAH" >> "$sRun"/logs/"$tDate"_output.log`
`echo "" >> "$sRun"/logs/"$tDate"_output.log`
`echo "wget for HTML: " >> "$sRun"/logs/"$tDate"_output.log`
`wget --load-cookies cookies.txt --post-data "$POSTDATAH" -t 5 -O "$sRun"/zips/"$tURL".zip -p $URLH 2>> "$sRun"/logs/"$tDate"_output.log`
`echo "" >> "$sRun"/logs/"$tDate"_output.log`
c=$((c+1))
# Decompress the HTML zip.
done
# Reset counter
c=1
for sID in ${spaceID[@]}
do
# Query string to get contentIDs
tCP="SELECT CONTENTID FROM confdb.CONTENT WHERE SPACEID=${sID}"
tURL=`echo ${spaceKey[0]} | cut -f$c -d " "`
tSN=`echo ${spaceName[0]} | cut -f$c -d "|"`
echo "$tURL"
# Expand zips to long directory names.
`unzip -d "$sRun"/zips/"$tSN"/ "$sRun"/zips/"$tURL".zip` 2>> "$sRun"/logs/"$tDate"_output.log
c=$((c+1))
rm "$sRun"/zips/"$tURL".zip
done
# Push files to samba share.
smbclient \\\\{server}\\{path} -U {domain_username} -W {domain} {domain_password} -c "cd \"Confluence PDF Exports\"; prompt off; lcd \"$sRun\"; mput *.pdf; cd \"HTML\"; lcd \"zips\"; recurse; mput *" 2>> "$sRun"/logs/"$tDate"_output.log
# Clean up files
rm -rdf $sRun/*.pdf
rm -rdf $sRun/zips/*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment