Last active
August 29, 2015 14:01
explode and reformat docx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:fo="http://www.w3.org/1999/XSL/Format"> | |
<xsl:output method="xml" encoding="UTF-8" indent="yes"/> | |
<!-- Copy everything --> | |
<xsl:template match="@*|node()|text()|comment()|processing-instruction()"> | |
<xsl:copy> | |
<xsl:apply-templates select="@*|node()|text()|comment()|processing-instruction()"/> | |
</xsl:copy> | |
</xsl:template> | |
</xsl:stylesheet> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# this scripts unpacks and reformat docx files | |
# | |
# you need xslt processor (Transform) in your path | |
# /c/Program Files/Saxonica/SaxonHE9.4N/bin/Transform | |
# | |
# make sure to copy remove-rsid.xslt and copy.xslt | |
if [ "$1" = "-r" ]; then | |
remove_rsid=1 | |
shift | |
fi | |
if [ "$1" = "" ]; then | |
echo expected name of the word document to be exploded | |
exit 1 | |
fi | |
suffix=${1##*.} | |
name="$1" | |
if [ "$suffix" = "xml" ]; then | |
suffix=docx | |
name=${1/%.xml/.docx} | |
fi | |
if [ "$suffix" = "$1" ]; then | |
suffix=docx | |
name=$1.docx | |
fi | |
corename=$(basename "$name" .$suffix) | |
if [ -z "$corename" ]; then | |
echo can not work with empty name | |
exit 1 | |
fi | |
DIR="$( cd "$( dirname "$0" )" && pwd )" | |
DOSDIR=$(cygpath -m $DIR) | |
FLAT=/tmp/flat.$$ | |
FLATOUT=/tmp/flat.$$.out | |
if [ "$remove_rsid" == "1" ]; then | |
transform=$DOSDIR/remove-rsid.xslt | |
else | |
transform=$DOSDIR/copy.xslt | |
fi | |
# $1 - file name | |
# | |
# formats file as xml | |
_reformat_xml() { | |
echo $1 | |
xmllint --format $1 -o $1.new | |
mv $1.new $1 | |
} | |
flaten() { | |
# xml | |
xmls="" | |
for f in $(find . -name '*.xml'); do | |
ff=$(echo ${f#./} | tr '/' 'Ø') | |
mv $f $FLAT/$ff | |
xmls="$xmls $ff" | |
done | |
# for rels, rename into .xml | |
rels="" | |
for f in $(find . -name '*.rels'); do | |
ff=$(echo ${f#./} | tr '/' 'Ø') | |
rels="$rels $ff.xml" | |
mv $f $FLAT/$ff.xml | |
done | |
} | |
expand_dirs() { | |
target_dir=$(pwd) | |
cd $FLATOUT | |
for f in $rels ; do | |
ff=$(echo ${f%.xml} | tr 'Ø' '/') | |
mv $f "$target_dir/$ff" | |
done | |
for f in $xmls ; do | |
ff=$(echo $f | tr 'Ø' '/') | |
mv $f "$target_dir/$ff" | |
done | |
cd "$target_dir" | |
} | |
if [ -e "$corename" ]; then | |
if [ -e "$corename.bak" ];then | |
# echo removing $corename.bak | |
rm -rf "$corename.bak" | |
fi | |
# echo backing up $corename | |
mv "$corename" "$corename.bak" | |
fi | |
mkdir "$corename" | |
cd "$corename" | |
unzip -q "../$name" | |
if [ -a $FLAT ]; then | |
rm -rf $FLAT | |
fi | |
mkdir $FLAT | |
flaten | |
if [ -a $FLATOUT ]; then | |
rm -rf $FLATOUT | |
fi | |
mkdir $FLATOUT | |
dosflat=$(cygpath -m $FLAT) | |
Transform -xsl:$transform -s:$dosflat -o:$dosflat.out | |
expand_dirs | |
rm -rf $FLAT $FLATOUT | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!-- Remove unwanted attributes or/and nodes --> | |
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:fo="http://www.w3.org/1999/XSL/Format" | |
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> | |
<xsl:output method="xml" encoding="UTF-8" indent="yes"/> | |
<!-- Copy everything --> | |
<xsl:template match="@*|node()|text()|comment()|processing-instruction()"> | |
<xsl:copy> | |
<xsl:apply-templates select="@*|node()|text()|comment()|processing-instruction()"/> | |
</xsl:copy> | |
</xsl:template> | |
<xsl:template match="w:rsid"/> | |
<xsl:template match="@w:rsidDel"/> | |
<xsl:template match="@w:rsidP"/> | |
<xsl:template match="@w:rsidR"/> | |
<xsl:template match="@w:rsidRDefault"/> | |
<xsl:template match="@w:rsidRPr"/> | |
<xsl:template match="@w:rsidSect"/> | |
<xsl:template match="@w:rsidTr"/> | |
<xsl:template match="w:proofErr"/> | |
<xsl:template match="w:lang"/> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment