Skip to content

Instantly share code, notes, and snippets.

@Miha-x64
Last active December 22, 2022 14:03
Show Gist options
  • Save Miha-x64/4ed50f1d5593e45a452efbf456aa1db4 to your computer and use it in GitHub Desktop.
Save Miha-x64/4ed50f1d5593e45a452efbf456aa1db4 to your computer and use it in GitHub Desktop.
HTML/XML escaping utils, answering https://stackoverflow.com/a/61215915/3050249
import java.io.IOException;
import java.io.UncheckedIOException;
/**
* XML escaping utils.
*
* Source: https://gist.github.com/Miha-x64/4ed50f1d5593e45a452efbf456aa1db4
*/
public final class Xml {
private Xml() {}
private static final long TEXT_ESCAPE = 1L << '&' | 1L << '<';
private static final long DOUBLE_QUOTED_ATTR_ESCAPE = TEXT_ESCAPE | 1L << '"';
private static final long SINGLE_QUOTED_ATTR_ESCAPE = TEXT_ESCAPE | 1L << '\'';
private static final long ESCAPES = DOUBLE_QUOTED_ATTR_ESCAPE | SINGLE_QUOTED_ATTR_ESCAPE;
// 'quot' and 'apos' are 1 char longer than '#34' and '#39' which I've decided to use
private static final String REPLACEMENTS = "&#34;&amp;&#39;&lt;";
private static final int REPL_SLICES = /* [0, 5, 10, 15, 19) */ 5<<5 | 10<<10 | 15<<15 | 19<<20;
// These 5-bit numbers packed into a single int are indices within REPLACEMENTS which is a 'flat' String[]
public static <A extends Appendable> A appendEscapedForTagBody(A appendable, CharSequence content) {
appendEscaped(appendable, content, TEXT_ESCAPE);
return appendable;
}
public static <A extends Appendable> A appendEscapedForDoubleQuotedAttrValue(A appendable, CharSequence content) {
appendEscaped(appendable, content, DOUBLE_QUOTED_ATTR_ESCAPE);
return appendable;
}
public static <A extends Appendable> A appendEscapedForSingleQuotedAttrValue(A appendable, CharSequence content) {
appendEscaped(appendable, content, SINGLE_QUOTED_ATTR_ESCAPE);
return appendable;
}
private static void appendEscaped(Appendable builder, CharSequence content, long escapes) {
try {
int startIdx = 0, len = content.length();
for (int i = 0; i < len; i++) {
char c = content.charAt(i);
long one;
if (((c & 63) == c) && ((one = 1L << c) & escapes) != 0) {
// -^^^^^^^^^^^^^^^ -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// | | take only dangerous characters
// | java shifts longs by 6 least significant bits,
// | e. g. << 0b110111111 is same as >> 0b111111. Filter out bigger characters
int index = Long.bitCount(ESCAPES & (one - 1));
builder
.append(content, startIdx, i /* exclusive */)
.append(REPLACEMENTS, REPL_SLICES >>> (5 * index) & 31, REPL_SLICES >>> (5 * (index + 1)) & 31);
startIdx = i + 1;
}
}
builder.append(content, startIdx, len);
} catch (IOException e) {
// typically, our Appendable is StringBuilder which does not throw; also,
// there's no way to declare 'if A#append() throws E, then appendEscaped() throws E, too'
throw new UncheckedIOException(e);
}
}
public static void main(String[] args) {
StringBuilder sb =
new StringBuilder("<!DOCTYPE html>\n<html lang=\"en\">\n<head><title>Test</title></head>\n<body>\n\n");
appendEscapedForDoubleQuotedAttrValue(sb.append("<p title=\""), "<\"I'm double-quoted!\">").append("\">");
appendEscapedForTagBody(sb, "<\"Hello!\">").append("</p>\n");
appendEscapedForSingleQuotedAttrValue(sb.append("<p title='"), "<\"I'm single-quoted!\">").append("'>");
appendEscapedForTagBody(sb, "<\"Goodbye!\">").append("</p>\n\n</body>\n</html>");
String escaped = sb.toString();
String expected = "<!DOCTYPE html>\n<html lang=\"en\">\n<head><title>Test</title></head>\n<body>\n\n" +
"<p title=\"&lt;&#34;I'm double-quoted!&#34;>\">&lt;\"Hello!\"></p>\n" +
"<p title='&lt;\"I&#39;m single-quoted!\">'>&lt;\"Goodbye!\"></p>\n" +
"\n</body>\n</html>";
if (!expected.equals(escaped))
throw new AssertionError("expected:<" + expected + "> but was:<" + escaped + ">");
System.out.println(escaped);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment