Skip to content

Instantly share code, notes, and snippets.

@dnault
Created July 14, 2023 23:32
Show Gist options
  • Save dnault/1520b0f4612c73e7b080f63f9ab4f900 to your computer and use it in GitHub Desktop.
Save dnault/1520b0f4612c73e7b080f63f9ab4f900 to your computer and use it in GitHub Desktop.
Quick way to remove top-level fields from a byte array containing a JSON object.
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.json.JsonMapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import static java.nio.charset.StandardCharsets.UTF_8;
public class JsonFieldEraser {
private static final JsonFactory jsonFactory = new JsonMapper().getFactory();
private JsonFieldEraser() {
throw new AssertionError("not instantiable");
}
/**
* Example usage.
*/
public static void main(String[] args) throws JsonProcessingException {
Map<String, Object> map = new LinkedHashMap<>();
map.put("meta1", 1);
map.put("meta2", Collections.emptyMap());
map.put("meta3", Collections.emptyList());
map.put("data1", Collections.singletonMap("color", "red"));
map.put("meta4", true);
map.put("data2", Collections.singletonList("xyzzy"));
map.put("meta5", "foo");
String json = new JsonMapper().writeValueAsString(map);
System.out.println(json);
byte[] bytes = json.getBytes(UTF_8);
erase(bytes, Set.of("meta1", "meta2", "meta3", "meta4", "meta5"));
System.out.println(new String(bytes, UTF_8));
}
/**
* Replaces the given fields with whitespace. Returns the set of erased fields.
*/
public static Set<String> erase(byte[] jsonObject, Set<String> topLevelFieldsToErase) {
Map<String, ByteRange> filedNameToRange = new LinkedHashMap<>();
try (JsonParser parser = jsonFactory.createParser(jsonObject)) {
if (parser.nextToken() != JsonToken.START_OBJECT) {
throw new IllegalArgumentException("Byte array does not contain JSON object");
}
int depth = 1;
JsonToken token;
while ((token = parser.nextToken()) != null) {
if (depth == 1 && token == JsonToken.FIELD_NAME) {
String fieldName = parser.currentName();
if (topLevelFieldsToErase.contains(fieldName)) {
int start = (int) parser.currentTokenLocation().getByteOffset();
skipToEndOfFieldValue(parser);
int pastEnd = (int) parser.currentLocation().getByteOffset();
filedNameToRange.put(fieldName, new ByteRange(jsonObject, start, pastEnd));
if (filedNameToRange.size() == topLevelFieldsToErase.size()) {
// found all the fields; don't need to keep parsing
break;
}
}
} else if (token == JsonToken.START_OBJECT) {
depth++;
} else if (token == JsonToken.END_OBJECT) {
depth--;
if (depth == 0 && parser.nextToken() != null) {
// multiple JSON roots, or trailing garbage
throw new IllegalArgumentException("Byte array does has multiple document roots, or trailing garbage");
}
}
}
for (ByteRange range : filedNameToRange.values()) {
range.swallowOneComma();
range.fill((byte) ' ');
}
return filedNameToRange.keySet();
} catch (IOException e) {
throw new IllegalArgumentException("Byte array does not contain valid JSON", e);
}
}
private static void skipToEndOfFieldValue(JsonParser parser) throws IOException {
JsonToken valueToken = parser.nextToken();
if (valueToken.isStructStart()) {
finishStruct(parser);
} else {
parser.finishToken();
}
}
private static void finishStruct(JsonParser parser) throws IOException {
int depth = 1;
JsonToken token;
while ((token = parser.nextToken()) != null) {
if (token == JsonToken.START_OBJECT || token == JsonToken.START_ARRAY) {
depth++;
} else if (token == JsonToken.END_OBJECT || token == JsonToken.END_ARRAY) {
depth--;
if (depth == 0) {
return;
}
}
}
throw new IllegalArgumentException("Unexpected end of JSON");
}
private static class ByteRange {
private final byte[] bytes;
private int startOffset;
private int pastEndOffset;
private ByteRange(byte[] bytes, int startOffset, int pastEndOffset) {
this.bytes = bytes;
this.startOffset = startOffset;
this.pastEndOffset = pastEndOffset;
}
@Override
public String toString() {
return "[" + startOffset + "," + pastEndOffset + ") = |" + new String(bytes, startOffset, pastEndOffset - startOffset, UTF_8) + "|";
}
void fill(byte fillByte) {
Arrays.fill(bytes, startOffset, pastEndOffset, fillByte);
}
private void swallowOneComma() {
swallowWhitespace();
if (bytes[pastEndOffset] == ',') {
pastEndOffset++;
} else if (bytes[startOffset - 1] == ',') {
startOffset--;
}
}
private void swallowWhitespace() {
while (isJsonWhitespace(bytes[startOffset - 1])) {
startOffset--;
}
while (isJsonWhitespace(bytes[pastEndOffset])) {
pastEndOffset++;
}
}
private static boolean isJsonWhitespace(byte b) {
switch (b) {
case 0x20: // Space
case 0x09: // Horizontal tab
case 0x0A: // LF
case 0x0D: // CR
return true;
default:
return false;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment