Skip to content

Instantly share code, notes, and snippets.

Created June 2, 2016 17:49
Show Gist options
  • Save anonymous/ddd6f2206e2cf05098d5740c54fbe9d9 to your computer and use it in GitHub Desktop.
Save anonymous/ddd6f2206e2cf05098d5740c54fbe9d9 to your computer and use it in GitHub Desktop.
Apache POI: Paragraph and Table contents from microsoft word file (docx) using Java and maven
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFTable;
public class ExtractTestCases {
public static void main(String[] args) {
try {
FileInputStream fis = new FileInputStream("path_to_file.docx");
XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
Iterator<IBodyElement> bodyElementIterator = xdoc
.getBodyElementsIterator();
while (bodyElementIterator.hasNext()) {
IBodyElement element = bodyElementIterator.next();
/* Read list of paragraphs */
if ("PARAGRAPH".equalsIgnoreCase(element.getElementType() .name())) {
List<XWPFParagraph> paraList = element.getBody().getParagraphs();
for (XWPFParagraph para : paraList) {
System.out.println("Para Contents:" + para.getText());
}
}
/* Read list of tables */
if ("TABLE".equalsIgnoreCase(element.getElementType().name())) {
List<XWPFTable> tableList = element.getBody().getTables();
for (XWPFTable table : tableList) {
System.out.println(table.getText());
}
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.test</groupId>
<artifactId>ApachePOI</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>ApachePOI</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- http://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15-beta1</version>
</dependency>
<!-- http://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.15-beta1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.15-beta1</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment