Apacheのtika を入れて,/usr/local/bin/parsexml を作って
/usr/local/bin/parsexml
#!/usr/bin/env bash
tika -t "$1"
$ chmod +x /usr/local/bin/parsexml
$ git config --global --add diff.parsexml.textconv /usr/local/bin/parsexml
${repo_root}/.gitattributes
*.pptx diff=parsexml
*.docx diff=parsexml
*.xlsx diff=parsexml
catdoc入れて,
$ git config --global --add diff.parseoldoffice.textconv catdoc
${repo_root}/.gitattributes
*.doc diff=parseoldoffice