Skip to content

Instantly share code, notes, and snippets.

@hoijui
Created May 20, 2019 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hoijui/922f5f564ee2d27c078526fd24f2b75b to your computer and use it in GitHub Desktop.
Save hoijui/922f5f564ee2d27c078526fd24f2b75b to your computer and use it in GitHub Desktop.
fast, stream-based XML pretty-printer (Java 8, LGPL v3+)
/*
* Copyright (C) 2019, Robin Vobruba <hoijui.quaero@gmail.com>.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
/**
* Takes XML content as input,
* and reproduces the same content as output,
* but more pleasing on the human eye,
* by adding proper line-endings and indents.
*
* This implementation is:
* + fast
* + memory efficient
* + uses no external dependencies
* + handles invalid XML gracefully
* - may produce strange results if there are '{@literal <}' or '{@literal >}' characters
* elsewhere then in tags
*/
@SuppressWarnings("WeakerAccess")
public class XmlFastRoughAndPrettyFormatter {
private final String indent;
/**
* Creates an instance with specific values.
*
* @param indent what string to use for oen indent
* (this might be two spaces or one TAB, for example)
*/
public XmlFastRoughAndPrettyFormatter(final String indent) {
this.indent = indent;
}
/**
* Creates an instance with default values.
*/
public XmlFastRoughAndPrettyFormatter() {
this(" ");
}
public static void main(final String[] args) throws IOException {
if (0 == args.length) {
new XmlFastRoughAndPrettyFormatter().prettify(System.in, System.out, createBuffer());
} else if (1 == args.length) {
try (final InputStream source = new FileInputStream(args[0])) {
new XmlFastRoughAndPrettyFormatter().prettify(source, System.out, createBuffer());
}
} else if (2 == args.length) {
try (final InputStream source = new FileInputStream(args[0]);
final OutputStream target = new FileOutputStream(args[1]))
{
new XmlFastRoughAndPrettyFormatter().prettify(source, target, createBuffer());
}
} else {
System.err.println("Usage:");
System.err.printf("\t%s in-file.xml out-file.xml%n", XmlFastRoughAndPrettyFormatter.class.getSimpleName());
System.err.printf("\t%s in-file.xml > out-file.xml%n", XmlFastRoughAndPrettyFormatter.class.getSimpleName());
System.err.printf("\t%s < in-file.xml > out-file.xml%n", XmlFastRoughAndPrettyFormatter.class.getSimpleName());
System.exit(1);
}
}
private static byte[] createBuffer() {
return new byte[2048];
}
/**
* Reformats XML content to be easy on the human eye.
*
* @param xmlIn the supplier of XML content to pretty-print
* @param xmlOut where the pretty XML content shall be written to
* @param buffer may be used internally for whatever in- or out-buffering there might be
* @throws IOException if any input or output fails
*/
public void prettify(final InputStream xmlIn, final OutputStream xmlOut, final byte[] buffer)
throws IOException {
try {
prettifyRoughAndFast(xmlIn, xmlOut, buffer);
} catch (final Exception ex) {
ex.printStackTrace(System.err);
// In case of failure of pretty-printing, use the XML as-is
transferTo(xmlIn, xmlOut, buffer);
}
}
/**
* Copies input content to output.
* The same like Java 9's {@code InputStream#transferTo(OutputStream)}.
*
* @param source the source of the data
* @param target where the source data should be copied to
* @param buffer the buffer to use for transfering;
* no more then {@code buffer.length} bytes are read at a time
* @throws IOException if any input or output fails
*/
public static void transferTo(final InputStream source, final OutputStream target, final byte[] buffer)
throws IOException
{
for (int n = source.read(buffer); n >= 0; n = source.read(buffer)) {
target.write(buffer, 0, n);
}
}
public void prettifyRoughAndFast(final InputStream xmlIn, final OutputStream xmlOut, final byte[] buffer)
throws IOException {
// this is a kind of stack, denoting the number of indents
int numIndents = 0;
// prepare the in-buffer
final StringBuilder inBuffer = new StringBuilder();
// prepare the out stream wrapper,
// which allows to write string data more comfortably
final PrintStream xmlOutPrinter = new PrintStream(xmlOut);
for (int readBytes = xmlIn.read(buffer); readBytes > 0; readBytes = xmlIn.read(buffer)) {
// convert the newly read part to a string
// and append it to the leftover, which was already read
inBuffer.append(new String(buffer, 0, readBytes));
// split all the content we have at the moment into rows (think: lines)
final String[] rows = inBuffer.toString()
.replaceAll(">", ">\n")
.replaceAll("<", "\n<")
.split("\n");
// handle all except the last row,
// because it is potentially incomplete
for (int ir = 0; ir < rows.length - 1; ir++) {
numIndents = handleRow(xmlOutPrinter, rows[ir].trim(), numIndents);
}
// fill the buffer with only the last row,
// which is potentially incomplete
inBuffer.setLength(0);
inBuffer.append(rows[rows.length - 1]);
}
// handle the last row
handleRow(xmlOutPrinter, inBuffer.toString().trim(), numIndents);
}
private static void appendIndents(final PrintStream output, final int numIndents, String indent) {
for (int ii = 0; ii < numIndents; ii++) {
output.append(indent);
}
}
public int handleRow(final PrintStream xmlOut, final String row, int numIndents) {
if (!row.isEmpty()) {
if (row.startsWith("<?")) {
xmlOut.append(row).append("\n");
} else if (row.startsWith("</")) {
--numIndents;
appendIndents(xmlOut, numIndents, indent);
xmlOut.append(row).append("\n");
} else if (row.startsWith("<") && !row.endsWith("/>")) {
numIndents++;
appendIndents(xmlOut, numIndents, indent);
xmlOut.append(row).append("\n");
if (row.endsWith("]]>")) {
numIndents--;
}
} else {
appendIndents(xmlOut, numIndents, indent);
xmlOut.append(row).append("\n");
}
}
return numIndents;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment