Skip to content

Instantly share code, notes, and snippets.

@libetl
Last active February 28, 2024 23:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save libetl/2dd91c987e72dde840ef57cbcfdf0d81 to your computer and use it in GitHub Desktop.
Save libetl/2dd91c987e72dde840ef57cbcfdf0d81 to your computer and use it in GitHub Desktop.
XmlToMap (Java + Kotlin)
package xmltomap;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@SuppressWarnings("unchecked")
public final class XmlToMap {
private static final Pattern CDATA = Pattern.compile("^<!\\[CDATA\\[(.+?)\\]\\]>");
private static final Pattern OPENING_TAG = Pattern.compile("^<([^\\s>/]+)[^>]*>");
private static final Pattern OPENING_CLOSING_TAG = Pattern.compile("^<([^\\s>/]+)[^>]*/>");
private static final Pattern CLOSING_TAG = Pattern.compile("^</([^\\s>]+)>");
private static final Pattern ATTRS = Pattern.compile("([a-zA-Z][a-zA-Z-0-9]*(?::[a-zA-Z][a-zA-Z-0-9]*)?)=\"([^\"]*)\"");
private static final Pattern UPPERCASE = Pattern.compile("^[A-Z]+$");
private static final Locale locale = Locale.getDefault();
public static class Options {
boolean ignoreNameSpace = false;
boolean prefixAttributesWithAttr = false;
Predicate<List<String>> shouldEnforceArray = (list) -> false;
public static final Options DEFAULT = new Options();
public Options ignoreNameSpace() {
this.ignoreNameSpace = true;
return this;
}
public Options prefixAttributesWithAttr() {
this.prefixAttributesWithAttr = true;
return this;
}
public Options shouldEnforceArrayWhen(Predicate<List<String>> isTrue) {
this.shouldEnforceArray = isTrue;
return this;
}
}
private static String toCamelCase(String fieldName) {
if (UPPERCASE.matcher(fieldName).matches()) {
return fieldName.toLowerCase(locale);
}
return fieldName.substring(0, 1).toLowerCase(locale) + fieldName.substring(1);
}
private static void attrSetFunc(String text, Object node, boolean prefixAttributesWithAttr, boolean ignoreNamespaces, List<String> namespaces) {
ATTRS.matcher(text).results().forEach(it -> {
if (!(node instanceof Map<?, ?>)) return;
Map<String, Object> node1 = (Map<String, Object>) node;
String[] attributeNameQualification = it.group(1).split(":");
if (attributeNameQualification.length == 2 && attributeNameQualification[0].equals("xmlns") && ignoreNamespaces) {
namespaces.add(attributeNameQualification[1]);
}
if (attributeNameQualification.length == 2 && namespaces.stream().anyMatch(namespace -> namespace.equals(attributeNameQualification[0])) && ignoreNamespaces)
return;
if (it.group(1).equals("xmlns") && ignoreNamespaces) {
return;
}
String attributeName = attributeNameQualification.length == 2 && ignoreNamespaces ? attributeNameQualification[1] : it.group(1);
node1.put((prefixAttributesWithAttr ? "attr:" : "") + toCamelCase(attributeName), it.group(2));
});
}
private static boolean areDifferent(Map<String, Object> attrsNode1, Map<String, Object> toBeCompared) {
return attrsNode1.size() != toBeCompared.size() || attrsNode1.entrySet().stream().anyMatch(it -> it.getValue() != toBeCompared.get(it.getKey()));
}
private static boolean foundNodeHavingDifferentAttributes(String localName, Map<String, Object> attrsNode, Object values) {
if (values instanceof Map<?, ?> && ((Map<String, Object>) values).get(localName) == null) {
return false;
}
if (values instanceof List<?>) {
return ((List<?>) values).stream().anyMatch(it -> areDifferent(attrsNode, (Map<String, Object>) it));
}
if (values instanceof Map<?, ?>) {
return areDifferent(attrsNode, (Map<String, Object>) values);
}
return false;
}
private static final Predicate<Map<String, Object>> alreadyArrayTyped = node -> node.keySet().size() == 2 && node.keySet().containsAll(Arrays.asList("arrayOfType", "children")) && node.get("children") instanceof List<?>;
private static final BiConsumer<Map<String, Object>, Map<String, Object>> saveAttributes = (attrsNode, intermediaryNode) -> attrsNode.forEach((key, value) -> {
if (intermediaryNode.get(key) instanceof List<?>) {
((List<Map<String, Object>>) intermediaryNode.get(key)).add((Map<String, Object>) value);
return;
}
if (intermediaryNode.get(key) == null) {
intermediaryNode.put(key, value);
return;
}
intermediaryNode.put(key, new ArrayList<>());
((List<Object>) intermediaryNode.get(key)).add(value);
});
public static Map<String, Object> xmlToMap(String body) {
return xmlToMap(body, Options.DEFAULT);
}
public static Map<String, Object> xmlToMap(String body, Options options) {
Map<String, Object> root = new HashMap<>();
Deque<Map<String, Object>> deque = new ArrayDeque<>();
Deque<String> breadcrumb = new ArrayDeque<>();
String currentLocalName = "";
String substring = body.trim().replaceAll(">\\s+<", "><");
List<String> namespaces = new ArrayList<>(Collections.singletonList("xmlns"));
Map<String, Object> currentNode = root;
while (!substring.isEmpty()) {
Matcher cdataMatcher = CDATA.matcher(substring);
Matcher openingClosingTagMatcher = OPENING_CLOSING_TAG.matcher(substring);
Matcher closingTagMatcher = CLOSING_TAG.matcher(substring);
Matcher openingTagMatcher = OPENING_TAG.matcher(substring);
if (cdataMatcher.find()) {
currentNode.put("CDATA", cdataMatcher.group(1));
substring = substring.substring(cdataMatcher.end(0));
} else if (openingClosingTagMatcher.find()) {
String localName = toCamelCase(options.ignoreNameSpace ? openingClosingTagMatcher.group(1).replaceAll("^[a-zA-Z](?:[a-zA-Z0-9_-])*:", "") : openingClosingTagMatcher.group(1));
Map<String, Object> attrs = new HashMap<>();
attrSetFunc(openingClosingTagMatcher.group(0), attrs, options.prefixAttributesWithAttr, options.ignoreNameSpace, namespaces);
currentNode.put(localName, attrs);
substring = substring.substring(openingClosingTagMatcher.end(0));
} else if (closingTagMatcher.find()) {
if (alreadyArrayTyped.test(deque.getLast())) {
if (breadcrumb.getLast().equals("children")) { // end of inlined array
breadcrumb.removeLast();
deque.removeLast();
}
breadcrumb.removeLast();
deque.removeLast();
}
if (breadcrumb.getLast().endsWith("(array)")) {
deque.removeLast();
currentNode = deque.getLast();
} else {
currentNode = deque.removeLast();
}
breadcrumb.removeLast();
substring = substring.substring(closingTagMatcher.end(0));
} else if (openingTagMatcher.find()) {
Map<String, Object> attrs = new HashMap<>();
attrSetFunc(openingTagMatcher.group(0), attrs, options.prefixAttributesWithAttr, options.ignoreNameSpace, namespaces);
String localName = toCamelCase(options.ignoreNameSpace ? openingTagMatcher.group(1).replaceAll("^[a-zA-Z](?:[a-zA-Z0-9_-])*:", "") : openingTagMatcher.group(1));
currentLocalName = localName;
if (alreadyArrayTyped.test(currentNode) && !currentNode.get("arrayOfType").equals(currentLocalName)) {
if ("children".equals(breadcrumb.getLast())) {
deque.removeLast();
deque.getLast();
breadcrumb.removeLast();
}
currentNode = deque.removeLast();
breadcrumb.removeLast();
}
if (alreadyArrayTyped.test(currentNode)) {
Map<String, Object> intermediaryNode = new HashMap<>();
((List<Map<String, Object>>) currentNode.get("children")).add(intermediaryNode);
saveAttributes.accept(attrs, intermediaryNode);
deque.add(intermediaryNode);
breadcrumb.add(localName + "(array)");
currentNode = intermediaryNode;
} else if ((!breadcrumb.isEmpty() && options.shouldEnforceArray.test(new ArrayList<>(breadcrumb))) || foundNodeHavingDifferentAttributes(localName, attrs, currentNode)) {
Map<String, Object> cloned = new HashMap<>();
Object currentValue = currentNode.get(currentLocalName);
boolean inlined = currentNode.size() > 2 || (currentNode.size() == 1 && !currentLocalName.equals(currentNode.keySet().iterator().next()));
boolean thereIsAlreadyAList = currentValue instanceof Map<?, ?> && alreadyArrayTyped.test((Map<String, Object>) currentValue);
List<Object> children;
if (thereIsAlreadyAList) {
children = (List<Object>) ((Map<String, Object>) currentValue).get("children");
} else {
children = new ArrayList<>();
if (currentValue instanceof Map<?, ?>) children.add(currentValue);
if (currentValue instanceof String)
children.add(new HashMap<>(Collections.singletonMap(currentLocalName, currentValue)));
cloned.put("arrayOfType", currentLocalName);
cloned.put("children", children);
if (inlined) {
deque.add(currentNode);
breadcrumb.add(currentLocalName);
deque.add(cloned);
breadcrumb.add("children");
currentNode.put(currentLocalName, cloned);
} else {
deque.add(currentNode);
breadcrumb.add(currentLocalName);
currentNode.putAll(cloned);
currentNode.remove(currentLocalName);
if (currentValue instanceof Map<?, ?>) {
Map<String, Object> finalCurrentNode = currentNode;
new HashSet<>(currentNode.keySet()).forEach(key -> {
if (!"children".equals(key) && !"arrayOfType".equals(key)) {
((Map<String, Object>) currentValue).put(key, finalCurrentNode.get(key));
finalCurrentNode.remove(key);
}
});
}
}
}
Map<String, Object> intermediaryNode = new HashMap<>();
children.add(intermediaryNode);
saveAttributes.accept(attrs, intermediaryNode);
deque.add(intermediaryNode);
breadcrumb.add(localName + "(array)");
} else {
saveAttributes.accept(attrs, currentNode);
deque.add(currentNode);
breadcrumb.add(localName);
}
Map<String, Object> newValue = breadcrumb.getLast().endsWith("(array)") ? deque.getLast() : new HashMap<>();
if (!breadcrumb.getLast().endsWith("(array)")) currentNode.put(localName, newValue);
currentNode = newValue;
substring = substring.substring(openingTagMatcher.end(0));
} else {
String data = substring.substring(0, substring.indexOf('<'));
Map<String, Object> parentNode = deque.getLast();
if (breadcrumb.getLast().endsWith("(array)")) {
currentNode.put(currentLocalName, data);
} else if (parentNode.containsKey(currentLocalName) &&
!(parentNode.get(currentLocalName) instanceof String) &&
currentNode.isEmpty())
parentNode.put(currentLocalName, data);
else {
if(currentNode.get("CDATA") instanceof String)
currentNode.put("CDATA", new ArrayList<>(Arrays.asList(currentNode.get("CDATA"), data)));
else if(currentNode.get("CDATA") instanceof List)
((List<Object>)currentNode.get("CDATA")).add(data);
else currentNode.put("CDATA", data);
}
currentNode.put("CDATA", data);
}
substring = substring.substring(substring.indexOf('<'));
}
return root;
}
}
package xmltomap
@Suppress("ComplexMethod", "LongMethod", "NestedBlockDepth", "UncheckedCast")
object XmlToMap {
private val CDATA = Regex("^<!\\[CDATA\\[(.+?)\\]\\]>")
private val OPENING_TAG = Regex("^<([^\\s>/]+)[^>]*>")
private val OPENING_CLOSING_TAG = Regex("^<([^\\s>/]+)[^>]*/>")
private val CLOSING_TAG = Regex("^</([^\\s>]+)>")
private val ATTRS = Regex("([a-zA-Z][a-zA-Z-0-9]*(?::[a-zA-Z][a-zA-Z-0-9]*)?)=\"([^\"]*)\"")
private val UPPERCASE = Regex("^[A-Z]+$")
private val locale = java.util.Locale.getDefault()
private fun toCamelCase(fieldName: String): String {
return if (UPPERCASE.matches(fieldName)) {
fieldName.lowercase(locale)
} else fieldName.substring(0, 1).lowercase(locale) + fieldName.substring(1)
}
private fun attrSetFunc(
text: String, node: Any, ignoreNamespaces: Boolean, namespaces: MutableList<String>
) {
ATTRS.findAll(text).forEach { it: MatchResult ->
if (node !is MutableMap<*, *>) return@forEach
val node1 = node as MutableMap<String, Any?>
val attributeNameQualification = (it.groups[1]?.value ?: "").split(":".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray()
if (attributeNameQualification.size == 2 && attributeNameQualification[0] == "xmlns" && ignoreNamespaces) {
namespaces.add(
attributeNameQualification[1]
)
}
if (attributeNameQualification.size == 2 && namespaces.any { namespace: String ->
namespace == attributeNameQualification[0]
} && ignoreNamespaces) return@forEach
if (it.groups[1]?.value == "xmlns" && ignoreNamespaces) {
return@forEach
}
val attributeName = if (attributeNameQualification.size == 2 && ignoreNamespaces) attributeNameQualification[1] else it.groups[1]?.value ?: ""
node1["_attr_" + toCamelCase(attributeName)] = it.groups[2]?.value
node1[toCamelCase(attributeName)] = it.groups[2]?.value
}
}
private fun areDifferent(
attrsNode1: Map<String, Any?>, toBeCompared: Map<String, Any?>
): Boolean {
return attrsNode1.size != toBeCompared.size || attrsNode1.entries.any { (key, value) ->
value !== toBeCompared[key]
}
}
private fun foundNodeHavingDifferentAttributes(
localName: String, attrsNode: Map<String, Any?>, values: Any
): Boolean {
if (values is Map<*, *> && (values as Map<String?, Any?>)[localName] == null) {
return false
}
if (values is List<*>) {
return values.any {
it is Map<*, *> && areDifferent(
attrsNode, it as Map<String, Any?>
)
}
}
return if (values is Map<*, *>) {
areDifferent(
attrsNode, values as Map<String, Any?>
)
} else false
}
private val alreadyArrayTyped: (Map<String, Any?>) -> Boolean = { node ->
node.keys.size == 2 && node.keys.containsAll(
mutableListOf(
"arrayOfType", "children"
)
) && node["children"] is List<*>
}
private val saveAttributes: (Map<String, Any?>, MutableMap<String, Any?>) -> Unit = { attrsNode: Map<String, Any?>, intermediaryNode: MutableMap<String, Any?> ->
attrsNode.forEach { (key: String, value: Any?) ->
if (intermediaryNode[key] is List<*>) {
(intermediaryNode[key] as MutableList<Map<String?, Any?>?>?)!!.add(
value as Map<String?, Any?>
)
return@forEach
}
if (intermediaryNode[key] == null) {
intermediaryNode[key] = value
return@forEach
}
intermediaryNode[key] = mutableListOf<Any>()
(intermediaryNode[key] as MutableList<Any?>?)!!.add(
value
)
}
}
private fun cleanAttributesPrefixes(
node: MutableMap<String, Any?>,
prefixAttributesWithAttr: Boolean,
seenNodes: Set<MutableMap<String, Any?>> = setOf()
) {
node.keys.filter { it.startsWith("_attr_") }.forEach { k ->
if (prefixAttributesWithAttr)
node.remove("_attr_$k")
else node.remove(k)
}
(node.values.filterIsInstance<MutableMap<*, *>>() +
node.values.filterIsInstance<List<*>>().flatMap { it.filterIsInstance<MutableMap<*, *>>() })
.forEach {
cleanAttributesPrefixes(it as MutableMap<String, Any?>, prefixAttributesWithAttr, seenNodes + node)
}
}
@JvmOverloads fun xmlToMap(
body: String, options: Options = Options.DEFAULT
): Map<String, Any?> {
val root: MutableMap<String, Any?> = mutableMapOf()
val deque: ArrayDeque<MutableMap<String, Any?>> = ArrayDeque()
val breadcrumb: ArrayDeque<String> = ArrayDeque()
var currentLocalName = ""
var substring = body.trim { it <= ' ' }.replace(">\\s+<".toRegex(), "><")
val namespaces: MutableList<String> = mutableListOf("xmlns")
var currentNode = root
while (substring.isNotEmpty()) {
CDATA.find(substring)?.also { cdataMatcher ->
currentNode["CDATA"] = cdataMatcher.groups[1]?.value
substring = substring.substring((cdataMatcher.groups[0]?.range?.last ?: 0) + 1)
} ?: OPENING_CLOSING_TAG.find(substring)?.also { openingClosingTagMatcher ->
val localName = toCamelCase(
(if (options.ignoreNameSpace) openingClosingTagMatcher.groups[1]?.value?.replace(
"^[a-zA-Z](?:[a-zA-Z0-9_-])*:".toRegex(), ""
) else openingClosingTagMatcher.groups[1]?.value) ?: ""
)
val attrs: Map<String, Any?> = mutableMapOf()
attrSetFunc(
openingClosingTagMatcher.groups[0]?.value ?: "", attrs, options.ignoreNameSpace, namespaces
)
currentNode[localName] = attrs
substring = substring.substring((openingClosingTagMatcher.groups[0]?.range?.last ?: 0) + 1)
} ?: CLOSING_TAG.find(substring)?.also { closingTagMatcher ->
if (alreadyArrayTyped(deque.last())) {
if (breadcrumb.last() == "children") { // end of inlined array
breadcrumb.removeLast()
deque.removeLast()
}
breadcrumb.removeLast()
deque.removeLast()
}
currentNode = if (breadcrumb.last().endsWith("(array)")) {
deque.removeLast()
deque.last()
} else {
deque.removeLast()
}
breadcrumb.removeLast()
substring = substring.substring((closingTagMatcher.groups[0]?.range?.last ?: 0) + 1)
} ?: OPENING_TAG.find(substring)?.also { openingTagMatcher ->
val attrs: Map<String, Any?> = mutableMapOf()
attrSetFunc(openingTagMatcher.groups[0]?.value ?: "", attrs, options.ignoreNameSpace, namespaces)
val localName = toCamelCase(
(if (options.ignoreNameSpace) openingTagMatcher.groups[1]?.value?.replace(
"^[a-zA-Z](?:[a-zA-Z0-9_-]*):".toRegex(), ""
) else openingTagMatcher.groups[1]?.value) ?: ""
)
currentLocalName = localName
if (alreadyArrayTyped(currentNode) && currentNode["arrayOfType"] != currentLocalName) {
if (breadcrumb.last() == "children") { // end of inlined array
deque.removeLast()
breadcrumb.removeLast()
}
currentNode = deque.removeLast()
breadcrumb.removeLast()
}
if (alreadyArrayTyped(currentNode)) {
val intermediaryNode: MutableMap<String, Any?> = mutableMapOf()
(currentNode["children"] as MutableList<Map<String, Any?>?>?)!!.add(intermediaryNode)
saveAttributes(attrs, intermediaryNode)
deque.add(intermediaryNode)
breadcrumb.add("$localName(array)")
currentNode = intermediaryNode
} else if (!breadcrumb.isEmpty() && options.shouldEnforceArray(breadcrumb.toList()) ||
foundNodeHavingDifferentAttributes(localName, attrs, currentNode)
) {
val cloned: MutableMap<String, Any?> = mutableMapOf()
val currentValue = currentNode[currentLocalName]
val inlined = currentNode.size > 2 || currentNode.size == 1 && currentLocalName != currentNode.keys.iterator().next()
val thereIsAlreadyAList = currentValue is Map<*, *> && alreadyArrayTyped(currentValue as Map<String, Any?>)
val children: MutableList<Any?>?
if (thereIsAlreadyAList) {
children = (currentValue as Map<String?, Any?>?)!!["children"] as MutableList<Any?>?
} else {
children = mutableListOf()
if (currentValue is MutableMap<*, *>) {
children.add(currentValue)
currentNode.entries
.filter { (k) -> k.startsWith("_attr_") }
.forEach { (k, v) ->
(currentValue as MutableMap<String, Any?>)[k] = v
currentValue[k.substring("_attr_".length)] = v
currentNode.remove(k)
currentNode.remove(k.substring("_attr_".length))
}
}
if (currentValue is String) children.add(mutableMapOf(currentLocalName to currentValue))
cloned["arrayOfType"] = currentLocalName
cloned["children"] = children
if (inlined) {
deque.add(currentNode)
breadcrumb.add(currentLocalName)
deque.add(cloned)
breadcrumb.add("children")
currentNode[currentLocalName] = cloned
} else {
deque.add(currentNode)
breadcrumb.add(currentLocalName)
currentNode.putAll(cloned)
currentNode.remove(currentLocalName)
if (currentValue is Map<*, *>) {
val finalCurrentNode = currentNode
currentNode.keys.toSet().forEach { key: String ->
if ("children" != key && "arrayOfType" != key) {
(currentValue as MutableMap<String?, Any?>)[key] = finalCurrentNode[key]
finalCurrentNode.remove(key)
}
}
}
}
}
val intermediaryNode: MutableMap<String, Any?> = mutableMapOf()
children!!.add(intermediaryNode)
saveAttributes(attrs, intermediaryNode)
deque.add(intermediaryNode)
breadcrumb.add("$localName(array)")
} else {
saveAttributes(attrs, currentNode)
deque.add(currentNode)
breadcrumb.add(localName)
}
val newValue: MutableMap<String, Any?> = if (breadcrumb.last().endsWith("(array)")) deque.last() else mutableMapOf()
if (!breadcrumb.last().endsWith("(array)")) currentNode[localName] = newValue
currentNode = newValue
substring = substring.substring((openingTagMatcher.groups[0]?.range?.last ?: 0) + 1)
} ?: run {
val data = substring.substring(0, substring.indexOf('<'))
val parentNode = deque.last()
if (breadcrumb.last().endsWith("(array)")) {
currentNode[currentLocalName] = data
} else if (parentNode.containsKey(currentLocalName) &&
parentNode[currentLocalName] !is String &&
currentNode.isEmpty()
) parentNode[currentLocalName] = data else currentNode["CDATA"] = when(currentNode["CDATA"]) {
is String -> listOf(currentNode["CDATA"], data)
is List<*> -> (currentNode["CDATA"] as List<*>) + data
else -> (currentNode["CDATA"]?.toString() ?: "") + data
}
substring = substring.substring(substring.indexOf('<'))
}
}
return root.apply { cleanAttributesPrefixes(this, options.prefixAttributesWithAttr) }
}
class Options(
var ignoreNameSpace: Boolean = false,
var prefixAttributesWithAttr: Boolean = false,
var shouldEnforceArray: (List<String>) -> Boolean = { false }
) {
companion object {
val DEFAULT = Options()
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment