Skip to content

Instantly share code, notes, and snippets.

@zhoulifu
Last active September 25, 2019 03:50
Show Gist options
  • Save zhoulifu/63047a06e3c27e68200a5465475f7e2a to your computer and use it in GitHub Desktop.
Save zhoulifu/63047a06e3c27e68200a5465475f7e2a to your computer and use it in GitHub Desktop.
A InputStream which read inline xml string formatted
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Objects;
public class PrettyXmlInputStream extends InputStream {
private static final byte[] EMPTY_BYTES = new byte[0];
private static final byte[] CLOSE_TAG_PREFIX = new byte[]{'<', '/'};
private static final byte[] CDATA_PREFIX = "[CDATA[".getBytes();
private static final byte[] CDATA_SUFFIX = new byte[]{']', '>'};
private static final byte[] COMMENT_PREFIX = new byte[]{'-', '-'};
private static final byte[] COMMENT_SUFFIX = new byte[]{'-', '>'};
private static final int DEFAULT_TAB_SIZE = 1;
private static final int DEFAULT_INDENT = 2;
private long readIdx = -1;
private State state = State.CONTENT;
private int tabSize;
private int indent;
private char indentChar;
private int depth;
private int indentRemaining;
private final InputStream in;
public PrettyXmlInputStream(String content) {
this(content, DEFAULT_TAB_SIZE, DEFAULT_INDENT, false);
}
public PrettyXmlInputStream(String content, int tabSize, int indent, boolean useTabs) {
this.in = new ByteArrayInputStream(
Objects.requireNonNull(content, "content").getBytes());
this.tabSize = tabSize;
this.indent = indent;
this.indentChar = useTabs ? '\t' : ' ';
}
@Override
public int read() throws IOException {
int c;
switch (state) {
case CARRIAGE_RETURN:
state = State.INDENT;
if (!aheadMatches(CLOSE_TAG_PREFIX)) {
depth++;
}
int ni = (indentChar == '\t') ? indent : tabSize * indent;
indentRemaining = depth * ni;
// fall down
case INDENT:
if (indentRemaining > 1) {
indentRemaining--;
return indentChar;
}
state = State.CONTENT;
return (indentRemaining > 0) ? indentChar : _read();
case OPEN_TAG:
c = _read();
switch (c) {
case '/':
depth--;
state = State.EMPTY_TAG;
break;
case '>':
state = State.TAG_STOP;
break;
default:
}
//TODO is valid tagName?
return c;
case CLOSE_TAG:
c = _read();
if (c == '>') {
depth--;
state = State.TAG_STOP;
}
//TODO is valid tagName?
return c;
case EMPTY_TAG: // '/' followed by '>' read
c = _read();
if (c != '>') {
throw new IllegalStateException("Invalid character '" + (char)c + "' at " + readIdx);
}
state = State.TAG_STOP;
return c;
case TAG_START: // '<' read
c = _read();
switch (c) {
case '/':
state = State.CLOSE_TAG;
break;
case '!':
if (aheadMatches(CDATA_PREFIX)) {
state = State.CDATA_START;
} else if (aheadMatches(COMMENT_PREFIX)) {
state = State.COMMENT_START;
} else {
state = State.OPEN_TAG;
}
break;
case '?':
state = State.DECLARE_START;
break;
default:
state = State.OPEN_TAG;
}
return c;
case TAG_STOP: // '>' read
//FIXME carriage return strategy
byte[] ahead = readAhead(2);
if (ahead.length < 2) {
state = State.CONTENT;
return '\n';
}
if (ahead[0] == '<') {
if (ahead[1] == '!') {
state = State.TAG_START;
return _read();
}
state = State.CARRIAGE_RETURN;
return '\n';
}
state = State.CONTENT;
return _read();
case COMMENT_START:
c = _read();
if (c == '-' && aheadMatches(COMMENT_SUFFIX)) {
state = State.COMMENT_STOP;
}
return c;
case COMMENT_STOP: // '-' followed by '->' read
c = _read();
if (c == '>') {
ahead = readAhead(2);
if (ahead.length == 2 && ahead[0] == '<' && ahead[1] != '/' && ahead[1] != '!') {
state = State.TAG_STOP;
} else {
state = State.CONTENT;
}
}
return c;
case DECLARE_START:
c = _read();
if (c == '>') {
state = State.DECLARE_STOP;
}
return c;
case DECLARE_STOP:
state = State.CONTENT;
return '\n';
case CDATA_START:
c = _read();
if (c == ']' && aheadMatches(CDATA_SUFFIX)) {
state = State.CDATA_STOP;
}
return c;
case CDATA_STOP: // ']' followed by ']>' read // fall down
case CONTENT: // fall down
default:
c = _read();
if (c == '<') {
state = State.TAG_START;
}
return c;
}
}
private int _read() throws IOException {
readIdx++;
return in.read();
}
private boolean aheadMatches(byte[] expected) throws IOException {
try {
in.mark(expected.length);
boolean matches = true;
for (int i = 0; (i < expected.length) && matches; i++) {
matches = (expected[i] == in.read());
}
return matches;
} finally {
in.reset();
}
}
@SuppressWarnings("all")
private byte[] readAhead(int limit) throws IOException {
try {
int len = Math.min(limit, in.available());
in.mark(len);
if (len == 0) {
return EMPTY_BYTES;
}
final byte[] buf = new byte[len];
in.read(buf, 0, len);
return buf;
} finally {
in.reset();
}
}
private enum State {
TAG_START,
TAG_STOP,
DECLARE_START,
DECLARE_STOP,
COMMENT_START,
COMMENT_STOP,
CDATA_START,
CDATA_STOP,
OPEN_TAG,
CLOSE_TAG,
EMPTY_TAG,
CARRIAGE_RETURN,
INDENT,
CONTENT
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment