Last active
May 20, 2021 10:10
-
-
Save Hashbrown777/5b316892a12670d0c4cc3a072bc39a71 to your computer and use it in GitHub Desktop.
Fixes improper close order in XML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//build regexes without worrying about | |
// - double-backslashing | |
// - adding whitespace for readability | |
// - adding in comments | |
const clean = (piece) => (piece | |
.replace(/((^|\n)(?:[^\/\\]|\/[^*\/]|\\.)*?)\s*\/\*(?:[^*]|\*[^\/])*(\*\/|)/g, '$1') | |
.replace(/((^|\n)(?:[^\/\\]|\/[^\/]|\\.)*?)\s*\/\/[^\n]*/g, '$1') | |
.replace(/\n\s*/g, '') | |
); | |
const regex = ({raw}, ...interpolations) => ( | |
new RegExp(interpolations.reduce( | |
(regex, insert, index) => (regex + insert + clean(raw[index + 1])), | |
clean(raw[0]) | |
)) | |
); | |
const xfcwCache = {}; | |
const xmlFixClosedWithin = (what = '[^\s<>"/\\=&]+') => ([xfcwCache[what] || (xfcwCache[what] = regex` | |
(?<=<)(${what})(\s(?:[^>"/]|"[^"]*")*|)(> | |
(?: | |
[^<] | |
|<(?!\1[\s<>"/\\=&])([^\s<>"/\\=&]+)(?: | |
\s(?: | |
[^>"/] | |
|"[^"]*" | |
)* | |
| | |
)(?: | |
\/> | |
|>[^<]*<\/\4> | |
) | |
)* | |
<\/)(?!\1)([^\s<>"/\\=&]+)(?=>) | |
`), '$1$2$3$1></$5><$1$2']); | |
const xfowCache = {}; | |
const xmlFixOpenedWithin = (what = '[^\s<>"/\\=&]+') => ([xfowCache[what] || (xfowCache[what] = regex` | |
(?<=<)(${what})(\s(?:[^>"/]|"[^"]*")*|)(> | |
(?: | |
[^<] | |
|<(?!\1[\s<>"/\\=&])([^\s<>"/\\=&]+)(?: | |
\s(?: | |
[^>"/] | |
|"[^"]*" | |
)* | |
| | |
)(?: | |
\/> | |
|>[^<]*<\/\4> | |
) | |
|</(?!\1)(?:[\s<>"/\\=&]+)> | |
)* | |
<)([^\s<>"/\\=&]+)(\s(?:[^>"/]|"[^"]*")*|)(?=> | |
(?: | |
[^<] | |
|<(?!(?:\1|\5)[\s<>"/\\=&])[^\s<>"/\\=&]+(?: | |
\s(?: | |
[^>"/] | |
|"[^"]*" | |
)* | |
| | |
)\/?> | |
|</(?!\1|\5)(?:[\s<>"/\\=&]+)> | |
)* | |
<\/\1>) | |
`), '$1$2$3/$1><$5$6><$1$2']); | |
const fixXML = (xml, fixes = [xmlFixClosedWithin(), xmlFixOpenedWithin()]) => { | |
if ((typeof fixes == 'string') || fixes instanceof String) | |
fixes = [xmlFixClosedWithin(fixes), xmlFixOpenedWithin(fixes)]; | |
let iterations = 10; | |
for ( | |
let change = ''; | |
change != xml && --iterations && (change = xml); | |
) { | |
for (let [problem, fix] of fixes) | |
xml = xml.replace(problem, fix); | |
} | |
if (!iterations) | |
throw new Error('Didn\'t manage to rectify the xml within 10 changes'); | |
return xml; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Algorithm 1; find nodes that were closed within a parent, but weren't opened there:
Close and reopen the parent around the child close tag.
Algorithm 2; find nodes that were opened within a parent, but weren't closed in time:
Close and reopen the parent around the child open tag.
Use both algorithms, but only allow duplicating tags for certain tagnames:
Assumes no CDATA, comments, or chevrons within attribute values.