Lixivial/confluence_export.sh

## confluence_export.sh
#!/bin/bash
# set -x
# Jesse Pearson
# Shell script to output all Confluence Spaces to PDF and optionally
# push them to a SMB or some other share.
#
# Dependencies:
# wget
#
# Optional dependencies:
# smbclient for pushing to a samba share.
#
# Changelog:
# 02.21.10  - jp - Fixed issues, made it use POST.
#
# 11.20.07  - jp - Added HTML export to grab attachments, too.
#
# 11.19.07  - jp - Changed SQL strings to be confUser and confPass.
#                - Push output of PDFs to \\nat01\IS-Temps\Confluence PDF Exports\
#                - Fixed logging
#                - Made "Space Name" the title of the PDF, rather than a randomly generated name.
#
# 11.08.07  - jp - Initial creation date.
#
# Vars:
# spaceName - Array containing the space names.
# spaceID   - Array containing the space IDs.
# spaceKey  - Array containing the space Keys to be used in the download URL.
# contentID - Array containing each space's content pages.
# URLP      - URL containing all the getContentBy strings used to grab PDF.
# URLH      - URL containing all the getContentBy strings used to grab HTML.
# sRun      - Path from which the directory is run
# t*        - Temporary holding variables
# confUser  - Confluence db user
# confPass  - Confluence db pass
#
# Setup Path
sRun="/home/jpearson/confluence_export"

# Query strings
tSN="SELECT SPACENAME FROM confdb.SPACES WHERE SPACETYPE='global'"
tSID="SELECT SPACEID FROM confdb.SPACES WHERE SPACETYPE='global'"
tSK="SELECT SPACEKEY FROM confdb.SPACES WHERE SPACETYPE='global'"
tDate=`date "+%m.%d.%Y_%I:%M:%S"`

# Setup db access vars
confUser="{conf_db_user}"
confPass="{conf_db_password}"

# Declare and instantiate the space* arrays
declare -a spaceName="`echo $tSN | mysql -u $confUser -p$confPass confdb -Bs | tr '\\n' '|'`"
declare -a spaceID="`echo $tSID | mysql -u $confUser -p$confPass confdb -Bs`"
declare -a spaceKey="`echo $tSK | mysql -u $confUser -p$confPass confdb -Bs`"

#Array index to get SPACEKEY reference later.
c=1

`wget --keep-session-cookies --save-cookies cookies.txt --post-data 'os_username={confluence_username}&os_password={confluence_password}' http://{server_name}:8080/confluence/login.action`

# Iterate through each Space
for sID in ${spaceID[@]}
do
   # Query string to get contentIDs
   tCP="SELECT CONTENTID FROM confdb.CONTENT WHERE SPACEID=${sID}"
   tURL=`echo ${spaceKey[0]} | cut -f$c -d " "`
   tSN=`echo ${spaceName[0]} | cut -f$c -d "|"`

   # Get all the subcontent relating to this space.
   contentID="`echo $tCP | mysql -u $confUser -p$confPass confdb -Bs`"

   # Begin building the URLs
   URLP="http://{server_name}:8080/confluence/spaces/doexportspace.action?key=$tURL"
   URLH="http://{server_name}:8080/confluence/spaces/doexportspace.action?key=$tURL"

   POSTDATAP="type=TYPE_PDF"
   POSTDATAH="type=TYPE_HTML"

   # Prepend &contentToBeExported= to each ContentID to begin making the URL
   for tID in ${contentID[@]}
   do
       POSTDATAP="$POSTDATAP&contentToBeExported=${tID}"
       POSTDATAH="$POSTDATAH&contentToBeExported=${tID}"
   done

   # Add authentication information to automatically login to Confluence.
   POSTDATAP="$POSTDATAP&includeComments=true&confirm=Export"
   POSTDATAH="$POSTDATAH&includeComments=true&backupAttachments=true&confirm=Export"

   # Log output for PDF URLs, and its wget output.
   `echo "PDF URL for $tURL: $URLP" >> "$sRun"/logs/"$tDate"_output.log`
   `echo "PDF POST data for $tURL: $POSTDATAP" >> "$sRun"/logs/"$tDate"_output.log`
   `echo "" >> "$sRun"/logs/"$tDate"_output.log`
   `echo "wget for PDF: " >> "$sRun"/logs/"$tDate"_output.log`
   `wget --load-cookies cookies.txt --post-data "$POSTDATAP" -t 5 -O "$sRun/$tSN.pdf" -p $URLP 2>> "$sRun"/logs/"$tDate"_output.log`
   `echo "" >> "$sRun"/logs/"$tDate"_output.log`

   # Log output for HTML URLs, and the wget output.
   `echo "HTML URL for $tURL: $URLH" >> "$sRun"/logs/"$tDate"_output.log`
   `echo "HTML POST data for $tURL: $POSTDATAH" >> "$sRun"/logs/"$tDate"_output.log`
   `echo "" >> "$sRun"/logs/"$tDate"_output.log`
   `echo "wget for HTML: " >> "$sRun"/logs/"$tDate"_output.log`
   `wget --load-cookies cookies.txt --post-data "$POSTDATAH" -t 5 -O "$sRun"/zips/"$tURL".zip -p $URLH 2>> "$sRun"/logs/"$tDate"_output.log`
   `echo "" >> "$sRun"/logs/"$tDate"_output.log`
   c=$((c+1))

    # Decompress the HTML zip.

done

# Reset counter
c=1

for sID in ${spaceID[@]}
do
   # Query string to get contentIDs
   tCP="SELECT CONTENTID FROM confdb.CONTENT WHERE SPACEID=${sID}"
   tURL=`echo ${spaceKey[0]} | cut -f$c -d " "`
   tSN=`echo ${spaceName[0]} | cut -f$c -d "|"`
   echo "$tURL"

   # Expand zips to long directory names.
   `unzip -d "$sRun"/zips/"$tSN"/ "$sRun"/zips/"$tURL".zip` 2>> "$sRun"/logs/"$tDate"_output.log
   c=$((c+1))
   rm "$sRun"/zips/"$tURL".zip

done

# Push files to samba share.
smbclient \\\\{server}\\{path} -U {domain_username} -W {domain} {domain_password} -c "cd \"Confluence PDF Exports\"; prompt off; lcd \"$sRun\"; mput *.pdf; cd \"HTML\"; lcd \"zips\"; recurse; mput *" 2>> "$sRun"/logs/"$tDate"_output.log

# Clean up files
rm -rdf $sRun/*.pdf
rm -rdf $sRun/zips/*
	#!/bin/bash
	# set -x
	# Jesse Pearson
	# Shell script to output all Confluence Spaces to PDF and optionally
	# push them to a SMB or some other share.
	#
	# Dependencies:
	# wget
	#
	# Optional dependencies:
	# smbclient for pushing to a samba share.
	#
	# Changelog:
	# 02.21.10 - jp - Fixed issues, made it use POST.
	#
	# 11.20.07 - jp - Added HTML export to grab attachments, too.
	#
	# 11.19.07 - jp - Changed SQL strings to be confUser and confPass.
	# - Push output of PDFs to \\nat01\IS-Temps\Confluence PDF Exports\
	# - Fixed logging
	# - Made "Space Name" the title of the PDF, rather than a randomly generated name.
	#
	# 11.08.07 - jp - Initial creation date.
	#
	# Vars:
	# spaceName - Array containing the space names.
	# spaceID - Array containing the space IDs.
	# spaceKey - Array containing the space Keys to be used in the download URL.
	# contentID - Array containing each space's content pages.
	# URLP - URL containing all the getContentBy strings used to grab PDF.
	# URLH - URL containing all the getContentBy strings used to grab HTML.
	# sRun - Path from which the directory is run
	# t* - Temporary holding variables
	# confUser - Confluence db user
	# confPass - Confluence db pass
	#
	# Setup Path
	sRun="/home/jpearson/confluence_export"

	# Query strings
	tSN="SELECT SPACENAME FROM confdb.SPACES WHERE SPACETYPE='global'"
	tSID="SELECT SPACEID FROM confdb.SPACES WHERE SPACETYPE='global'"
	tSK="SELECT SPACEKEY FROM confdb.SPACES WHERE SPACETYPE='global'"
	tDate=`date "+%m.%d.%Y_%I:%M:%S"`

	# Setup db access vars
	confUser="{conf_db_user}"
	confPass="{conf_db_password}"

	# Declare and instantiate the space* arrays
	declare -a spaceName="`echo $tSN \| mysql -u $confUser -p$confPass confdb -Bs \| tr '\\n' '\|'`"
	declare -a spaceID="`echo $tSID \| mysql -u $confUser -p$confPass confdb -Bs`"
	declare -a spaceKey="`echo $tSK \| mysql -u $confUser -p$confPass confdb -Bs`"

	#Array index to get SPACEKEY reference later.
	c=1

	`wget --keep-session-cookies --save-cookies cookies.txt --post-data 'os_username={confluence_username}&os_password={confluence_password}' http://{server_name}:8080/confluence/login.action`

	# Iterate through each Space
	for sID in ${spaceID[@]}
	do
	# Query string to get contentIDs
	tCP="SELECT CONTENTID FROM confdb.CONTENT WHERE SPACEID=${sID}"
	tURL=`echo ${spaceKey[0]} \| cut -f$c -d " "`
	tSN=`echo ${spaceName[0]} \| cut -f$c -d "\|"`

	# Get all the subcontent relating to this space.
	contentID="`echo $tCP \| mysql -u $confUser -p$confPass confdb -Bs`"

	# Begin building the URLs
	URLP="http://{server_name}:8080/confluence/spaces/doexportspace.action?key=$tURL"
	URLH="http://{server_name}:8080/confluence/spaces/doexportspace.action?key=$tURL"

	POSTDATAP="type=TYPE_PDF"
	POSTDATAH="type=TYPE_HTML"

	# Prepend &contentToBeExported= to each ContentID to begin making the URL
	for tID in ${contentID[@]}
	do
	POSTDATAP="$POSTDATAP&contentToBeExported=${tID}"
	POSTDATAH="$POSTDATAH&contentToBeExported=${tID}"
	done

	# Add authentication information to automatically login to Confluence.
	POSTDATAP="$POSTDATAP&includeComments=true&confirm=Export"
	POSTDATAH="$POSTDATAH&includeComments=true&backupAttachments=true&confirm=Export"

	# Log output for PDF URLs, and its wget output.
	`echo "PDF URL for $tURL: $URLP" >> "$sRun"/logs/"$tDate"_output.log`
	`echo "PDF POST data for $tURL: $POSTDATAP" >> "$sRun"/logs/"$tDate"_output.log`
	`echo "" >> "$sRun"/logs/"$tDate"_output.log`
	`echo "wget for PDF: " >> "$sRun"/logs/"$tDate"_output.log`
	`wget --load-cookies cookies.txt --post-data "$POSTDATAP" -t 5 -O "$sRun/$tSN.pdf" -p $URLP 2>> "$sRun"/logs/"$tDate"_output.log`
	`echo "" >> "$sRun"/logs/"$tDate"_output.log`

	# Log output for HTML URLs, and the wget output.
	`echo "HTML URL for $tURL: $URLH" >> "$sRun"/logs/"$tDate"_output.log`
	`echo "HTML POST data for $tURL: $POSTDATAH" >> "$sRun"/logs/"$tDate"_output.log`
	`echo "" >> "$sRun"/logs/"$tDate"_output.log`
	`echo "wget for HTML: " >> "$sRun"/logs/"$tDate"_output.log`
	`wget --load-cookies cookies.txt --post-data "$POSTDATAH" -t 5 -O "$sRun"/zips/"$tURL".zip -p $URLH 2>> "$sRun"/logs/"$tDate"_output.log`
	`echo "" >> "$sRun"/logs/"$tDate"_output.log`
	c=$((c+1))

	# Decompress the HTML zip.

	done

	# Reset counter
	c=1

	for sID in ${spaceID[@]}
	do
	# Query string to get contentIDs
	tCP="SELECT CONTENTID FROM confdb.CONTENT WHERE SPACEID=${sID}"
	tURL=`echo ${spaceKey[0]} \| cut -f$c -d " "`
	tSN=`echo ${spaceName[0]} \| cut -f$c -d "\|"`
	echo "$tURL"

	# Expand zips to long directory names.
	`unzip -d "$sRun"/zips/"$tSN"/ "$sRun"/zips/"$tURL".zip` 2>> "$sRun"/logs/"$tDate"_output.log
	c=$((c+1))
	rm "$sRun"/zips/"$tURL".zip

	done

	# Push files to samba share.
	smbclient \\\\{server}\\{path} -U {domain_username} -W {domain} {domain_password} -c "cd \"Confluence PDF Exports\"; prompt off; lcd \"$sRun\"; mput .pdf; cd \"HTML\"; lcd \"zips\"; recurse; mput " 2>> "$sRun"/logs/"$tDate"_output.log

	# Clean up files
	rm -rdf $sRun/*.pdf
	rm -rdf $sRun/zips/*