Skip to content

Instantly share code, notes, and snippets.

@nberlette
Last active March 24, 2024 13:53
Show Gist options
  • Save nberlette/d00faf7253a5f76ec2893cb7a51647ec to your computer and use it in GitHub Desktop.
Save nberlette/d00faf7253a5f76ec2893cb7a51647ec to your computer and use it in GitHub Desktop.
XMLFormatter
export enum EOL {
CRLF = "\r\n",
CR = "\r",
LF = "\n",
}
export interface Options {
newLine?: EOL | `${EOL}`;
lineWidth?: number;
tabSize?: number;
useTabs?: boolean;
splitNS?: boolean;
finalNewLine?: boolean;
removeComments?: boolean;
verbose?: boolean;
debug?: boolean;
}
/**
* Internal tool for minifying or formatting XML and HTML data.
*
* Based on pretty-data.
*
* @see https://github.com/vkiryukhin/pretty-data
*/
export class XMLFormatter {
static readonly options = {
useTabs: false,
splitNS: true,
tabSize: 4,
newLine: "\n",
finalNewLine: true,
removeComments: false,
verbose: false,
debug: false,
} satisfies Options;
static #default?: XMLFormatter;
static get default(): XMLFormatter {
return XMLFormatter.#default ??= new XMLFormatter(XMLFormatter.options);
}
static format(
xml: string,
options: Options = XMLFormatter.options,
): string {
return new XMLFormatter(options).format(xml);
}
static minify(
xml: string,
options: Options = XMLFormatter.options,
): string {
return new XMLFormatter(options).minify(xml);
}
static from(options: Options = XMLFormatter.options) {
return new XMLFormatter(options);
}
constructor(options: XMLFormatter.Options = {}) {
const opt = { ...XMLFormatter.options, ...options } as Required<Options>;
const {
newLine,
useTabs,
tabSize,
splitNS,
finalNewLine,
verbose,
debug,
removeComments,
} = opt;
Object.assign(this, {
newLine,
useTabs,
tabSize,
splitNS,
verbose,
debug,
finalNewLine,
removeComments,
});
}
#lineWidth = 80;
#newLine: EOL | `${EOL}` = EOL.LF;
#removeComments = false;
#splitNS = true;
#tabSize = 4;
#useTabs = false;
#verbose = false;
#debug = false;
#finalNewLine = true;
public get indent(): string {
return this.useTabs ? "\t" : " ".repeat(this.tabSize);
}
public get useTabs(): boolean {
return this.#useTabs;
}
public set useTabs(value: boolean) {
this.#useTabs = Boolean(value);
}
public get tabSize(): number {
return this.#tabSize;
}
public set tabSize(value: number) {
if (typeof value !== "number" || isNaN(value)) {
throw new TypeError("[XMLFormatter] 'tabSize' must be a number");
}
if (value < 0 || value > 8) {
throw new RangeError("[XMLFormatter] 'tabSize' must be between 0 and 8");
}
this.#tabSize = value;
}
public get splitNS(): boolean {
return this.#splitNS;
}
public set splitNS(value: boolean) {
this.#splitNS = Boolean(value);
}
public get removeComments(): boolean {
return this.#removeComments;
}
public set removeComments(value: boolean) {
this.#removeComments = Boolean(value);
}
public get lineWidth(): number {
return this.#lineWidth;
}
public set lineWidth(value: number) {
if (typeof value !== "number" || isNaN(value)) {
throw new TypeError("[XMLFormatter] 'lineWidth' must be a number");
}
if (value < 0 || value > 1000) {
throw new RangeError(
"[XMLFormatter] 'lineWidth' must be between 0 and 1000",
);
}
this.#lineWidth = value;
}
public get newLine(): EOL | `${EOL}` {
return this.#newLine;
}
public set newLine(value: EOL | `${EOL}`) {
if (![EOL.CRLF, EOL.CR, EOL.LF].includes(value as EOL)) {
throw new TypeError(
"[XMLFormatter] 'newLine' must be either '\\r\\n', '\\r', or '\\n'.",
);
}
this.#newLine = value;
}
public get finalNewLine(): boolean {
return this.#finalNewLine;
}
public set finalNewLine(value: boolean) {
this.#finalNewLine = Boolean(value);
}
public get verbose(): boolean {
return this.#verbose ??= false;
}
public set verbose(value: boolean) {
this.#verbose = Boolean(value);
}
public get debug(): boolean {
return this.#debug ??= false;
}
public set debug(value: boolean) {
this.#debug = Boolean(value);
}
public format(xml: string): string {
const DELIM = "~::~";
xml = this.minify(xml, false).replace(/(<)/g, `${DELIM}$1`);
if (this.splitNS) xml = xml.replace(/xmlns([:=])/g, `${DELIM}xmlns$1`);
const parts = xml.split(DELIM);
if (this.debug) console.log(parts);
let inComment = false, level = 0, output = "";
for (let i = 0; i < parts.length; i++) {
// <!
if (~parts[i].search(/<!/)) {
output += this.#getIndent(level, parts[i]);
// end <!
inComment = !(
~parts[i].search(/-->/) || ~parts[i].search(/\]>/) || ~parts[i].search(/!DOCTYPE/i)
);
} else if (~parts[i].search(/-->/) || ~parts[i].search(/\]>/)) { // end <!
output += parts[i], inComment = false;
} else if (
// <elm></elm>
/^<(\w|:)/.test(parts[i - 1]) &&
/^<\/(\w|:)/.test(parts[i]) &&
/^<[\w:\-.,/]+/.exec(parts[i - 1])?.[0] ==
/^<\/[\w:\-.,]+/.exec(parts[i])?.[0]?.replace(/\//, "")
) {
output += parts[i];
!inComment && --level;
} else if (!~parts[i].search(/<\//) && !~parts[i].search(/\/>/)) {
if (~parts[i].search(/<(\w|:)/)) { // <elm>
output += inComment ? parts[i] : this.#getIndent(level++, parts[i]);
}
} else if (~parts[i].search(/<(\w|:)/) && ~parts[i].search(/<\//)) {
// <elm>...</elm>
output += inComment ? parts[i] : this.#getIndent(level, parts[i]);
} else if (~parts[i].search(/<\//)) { // </elm>
output += inComment ? parts[i] : this.#getIndent(level--, parts[i]);
} else if (!this.splitNS || !~parts[i].search(/xmlns[:=]/)) {
if (~parts[i].search(/\/>/)) { // <elm />
output += inComment ? parts[i] : this.#getIndent(level, parts[i]);
}
} else if (~parts[i].search(/\/>/) && ~parts[i].search(/xmlns[:=]/)) {
if (this.splitNS) { // xmlns />
output += inComment ? parts[i] : this.#getIndent(level--, parts[i]);
}
} else if (~parts[i].search(/<\?/)) { // <?xml ... ?>
output += this.#getIndent(level, parts[i]);
} else if (~parts[i].search(/xmlns\:/) || ~parts[i].search(/xmlns\=/)) {
if (this.splitNS) output += this.#getIndent(level, parts[i]); // xmlns
} else {
output += parts[i];
}
}
// remove leading newline
const LEADING_LF_RE = /^(\r\n|\r|\n)+/;
const TRAILING_LF_RE = /(\r\n|\r|\n)+$/;
output = output.replace(LEADING_LF_RE, "");
// remove trailing newlines
output = output.replace(/[\r\n]+$/, "");
// add final newline, if desired
if (this.finalNewLine) output += this.newLine;
return output;
}
public minify(xml: string, removeComments = this.removeComments): string {
removeComments ??= false;
// all line breaks outside of CDATA elements
xml = this.#stripLineBreaks(xml);
// remove comments
if (removeComments) {
xml = xml.replace(
/\<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)\>/g,
"",
);
}
// insignificant whitespace between tags
xml = xml.replace(/>\s{0,}</g, "><");
// spaces between attributes
xml = xml.replace(/"\s+(?=[^\s]+=)/g, '" ');
// spaces between the last attribute and tag close (>)
xml = xml.replace(/"\s+(?=>)/g, '"');e
// spaces between the last attribute and tag close (/>)
xml = xml.replace(/"\s+(?=\/>)/g, '" ');
// spaces between the node name and the first attribute
xml = xml.replace(/[^ <>="]\s+[^ <>="]+=/g, (m) => m.replace(/\s+/g, " "));
// final new line
xml = xml.replace(/\s+$/, "");
if (this.finalNewLine) xml += this.newLine;
return xml;
}
#getIndent(level: number, trailingValue = ""): string {
return `${this.newLine}${this.indent.repeat(level)}${trailingValue}`;
}
#stripLineBreaks(xml: string): string {
let output = "";
let inCdata = false, inComment = false;
let inTag = false, inTagName = false, inAttribute = false;
const reset = () => {
// deno-fmt-ignore
inTag = inCdata = inTagName = inComment = inAttribute = false;
};
for (let i = 0; i < xml.length; i++) {
const char = xml[i], prev = xml[i - 1], next = xml[i + 1];
if (
!inCdata && !inComment && !inTag && char == "!" &&
(xml.slice(i, 8) == "![CDATA[" || xml.slice(i, 3) == "!--")
) {
inCdata = true;
inComment = xml.slice(i, 3) == "!--";
} else if (
inCdata && !inComment && !inTagName && !inAttribute && (
(char == "]" && (xml.slice(i, 3) == "]]>")) ||
(char == "-" && (xml.slice(i, 3) == "-->"))
)
) {
reset();
} else if (char.search(/[\r\n]/g) > -1 && !inCdata && !inComment) {
if (
/\r/.test(char) && /\S|\r|\n/.test(prev) &&
/\S|\r|\n/.test(xml.charAt(i + this.newLine.length))
) {
output += char;
} else if (
/\n/.test(char) &&
/\S|\r|\n/.test(xml.charAt(i - this.newLine.length)) &&
/\S|\r|\n/.test(next)
) {
output += char;
}
continue;
}
output += char;
}
return output;
}
}
export declare namespace XMLFormatter {
export type { Options };
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment