Last active
September 25, 2019 03:50
-
-
Save zhoulifu/63047a06e3c27e68200a5465475f7e2a to your computer and use it in GitHub Desktop.
A InputStream which read inline xml string formatted
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.ByteArrayInputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.util.Objects; | |
public class PrettyXmlInputStream extends InputStream { | |
private static final byte[] EMPTY_BYTES = new byte[0]; | |
private static final byte[] CLOSE_TAG_PREFIX = new byte[]{'<', '/'}; | |
private static final byte[] CDATA_PREFIX = "[CDATA[".getBytes(); | |
private static final byte[] CDATA_SUFFIX = new byte[]{']', '>'}; | |
private static final byte[] COMMENT_PREFIX = new byte[]{'-', '-'}; | |
private static final byte[] COMMENT_SUFFIX = new byte[]{'-', '>'}; | |
private static final int DEFAULT_TAB_SIZE = 1; | |
private static final int DEFAULT_INDENT = 2; | |
private long readIdx = -1; | |
private State state = State.CONTENT; | |
private int tabSize; | |
private int indent; | |
private char indentChar; | |
private int depth; | |
private int indentRemaining; | |
private final InputStream in; | |
public PrettyXmlInputStream(String content) { | |
this(content, DEFAULT_TAB_SIZE, DEFAULT_INDENT, false); | |
} | |
public PrettyXmlInputStream(String content, int tabSize, int indent, boolean useTabs) { | |
this.in = new ByteArrayInputStream( | |
Objects.requireNonNull(content, "content").getBytes()); | |
this.tabSize = tabSize; | |
this.indent = indent; | |
this.indentChar = useTabs ? '\t' : ' '; | |
} | |
@Override | |
public int read() throws IOException { | |
int c; | |
switch (state) { | |
case CARRIAGE_RETURN: | |
state = State.INDENT; | |
if (!aheadMatches(CLOSE_TAG_PREFIX)) { | |
depth++; | |
} | |
int ni = (indentChar == '\t') ? indent : tabSize * indent; | |
indentRemaining = depth * ni; | |
// fall down | |
case INDENT: | |
if (indentRemaining > 1) { | |
indentRemaining--; | |
return indentChar; | |
} | |
state = State.CONTENT; | |
return (indentRemaining > 0) ? indentChar : _read(); | |
case OPEN_TAG: | |
c = _read(); | |
switch (c) { | |
case '/': | |
depth--; | |
state = State.EMPTY_TAG; | |
break; | |
case '>': | |
state = State.TAG_STOP; | |
break; | |
default: | |
} | |
//TODO is valid tagName? | |
return c; | |
case CLOSE_TAG: | |
c = _read(); | |
if (c == '>') { | |
depth--; | |
state = State.TAG_STOP; | |
} | |
//TODO is valid tagName? | |
return c; | |
case EMPTY_TAG: // '/' followed by '>' read | |
c = _read(); | |
if (c != '>') { | |
throw new IllegalStateException("Invalid character '" + (char)c + "' at " + readIdx); | |
} | |
state = State.TAG_STOP; | |
return c; | |
case TAG_START: // '<' read | |
c = _read(); | |
switch (c) { | |
case '/': | |
state = State.CLOSE_TAG; | |
break; | |
case '!': | |
if (aheadMatches(CDATA_PREFIX)) { | |
state = State.CDATA_START; | |
} else if (aheadMatches(COMMENT_PREFIX)) { | |
state = State.COMMENT_START; | |
} else { | |
state = State.OPEN_TAG; | |
} | |
break; | |
case '?': | |
state = State.DECLARE_START; | |
break; | |
default: | |
state = State.OPEN_TAG; | |
} | |
return c; | |
case TAG_STOP: // '>' read | |
//FIXME carriage return strategy | |
byte[] ahead = readAhead(2); | |
if (ahead.length < 2) { | |
state = State.CONTENT; | |
return '\n'; | |
} | |
if (ahead[0] == '<') { | |
if (ahead[1] == '!') { | |
state = State.TAG_START; | |
return _read(); | |
} | |
state = State.CARRIAGE_RETURN; | |
return '\n'; | |
} | |
state = State.CONTENT; | |
return _read(); | |
case COMMENT_START: | |
c = _read(); | |
if (c == '-' && aheadMatches(COMMENT_SUFFIX)) { | |
state = State.COMMENT_STOP; | |
} | |
return c; | |
case COMMENT_STOP: // '-' followed by '->' read | |
c = _read(); | |
if (c == '>') { | |
ahead = readAhead(2); | |
if (ahead.length == 2 && ahead[0] == '<' && ahead[1] != '/' && ahead[1] != '!') { | |
state = State.TAG_STOP; | |
} else { | |
state = State.CONTENT; | |
} | |
} | |
return c; | |
case DECLARE_START: | |
c = _read(); | |
if (c == '>') { | |
state = State.DECLARE_STOP; | |
} | |
return c; | |
case DECLARE_STOP: | |
state = State.CONTENT; | |
return '\n'; | |
case CDATA_START: | |
c = _read(); | |
if (c == ']' && aheadMatches(CDATA_SUFFIX)) { | |
state = State.CDATA_STOP; | |
} | |
return c; | |
case CDATA_STOP: // ']' followed by ']>' read // fall down | |
case CONTENT: // fall down | |
default: | |
c = _read(); | |
if (c == '<') { | |
state = State.TAG_START; | |
} | |
return c; | |
} | |
} | |
private int _read() throws IOException { | |
readIdx++; | |
return in.read(); | |
} | |
private boolean aheadMatches(byte[] expected) throws IOException { | |
try { | |
in.mark(expected.length); | |
boolean matches = true; | |
for (int i = 0; (i < expected.length) && matches; i++) { | |
matches = (expected[i] == in.read()); | |
} | |
return matches; | |
} finally { | |
in.reset(); | |
} | |
} | |
@SuppressWarnings("all") | |
private byte[] readAhead(int limit) throws IOException { | |
try { | |
int len = Math.min(limit, in.available()); | |
in.mark(len); | |
if (len == 0) { | |
return EMPTY_BYTES; | |
} | |
final byte[] buf = new byte[len]; | |
in.read(buf, 0, len); | |
return buf; | |
} finally { | |
in.reset(); | |
} | |
} | |
private enum State { | |
TAG_START, | |
TAG_STOP, | |
DECLARE_START, | |
DECLARE_STOP, | |
COMMENT_START, | |
COMMENT_STOP, | |
CDATA_START, | |
CDATA_STOP, | |
OPEN_TAG, | |
CLOSE_TAG, | |
EMPTY_TAG, | |
CARRIAGE_RETURN, | |
INDENT, | |
CONTENT | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment