Skip to content

Instantly share code, notes, and snippets.

@ezhov-da
Last active March 10, 2019 12:23
Show Gist options
  • Save ezhov-da/e212de7dc017461e399fd38549762097 to your computer and use it in GitHub Desktop.
Save ezhov-da/e212de7dc017461e399fd38549762097 to your computer and use it in GitHub Desktop.
groovy pdf wpc
<pre>
apply plugin: 'idea'
compile group: 'org.apache.pdfbox', name: 'pdfbox', version: '2.0.6'
package ru.ezhov.groovy.pdf
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.text.PDFTextStripper
import org.apache.pdfbox.text.PDFTextStripperByArea
import java.util.regex.Matcher
import java.util.regex.Pattern
PDDocument document =
PDDocument.load(new File("E:/MDM/WPC_ScriptingReference_5320if2_03.pdf"))
if (!document.isEncrypted()) {
PDFTextStripperByArea stripper = new PDFTextStripperByArea()
stripper.setSortByPosition(true)
PDFTextStripper tStripper = new PDFTextStripper()
tStripper.setAddMoreFormatting(true)
String pdfFileInText = tStripper.getText(document)
String[] lines = pdfFileInText.split("\\r?\\n")
Pattern pattern = Pattern.compile("^\\d+")
StringBuilder stringBuilder = new StringBuilder()
for (String line : lines) {
Matcher matcher = pattern.matcher(line);
//Очищаем от мусора
if (!line.startsWith("WebSphere Product") && !matcher.find()) {
//добавляем отступы для некорневых элементов
String resultLine = ""
if (stringSet.contains(line)) {
resultLine = line
} else {
String[] linesTab = line.split("\\r\\n")
if (linesTab.length != 0) {
if (linesTab.length > 2) {
resultLine = resultLine + "\t" + linesTab[0] + "\n"
resultLine = resultLine + "\t\t" + linesTab[1] + "\n"
resultLine = resultLine + "\t" + linesTab[0];
} else {
resultLine = resultLine + "\t" + linesTab[0] + "\n"
resultLine = resultLine + "\t\t" + linesTab[1] + "\n"
}
}
}
System.out.println(resultLine)
stringBuilder.append(resultLine)
stringBuilder.append("\n")
}
}
System.out.println(stringBuilder)
// try (FileWriter fileWriter = new FileWriter(new File("E:/script-tabbed.txt"));) {
// fileWriter.write(stringBuilder.toString());
// }
}
</pre>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment