Skip to content

Instantly share code, notes, and snippets.

@stla
Forked from dfkaye/json-normalize.js
Created July 4, 2021 09:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stla/1056b9897cc2b55a36a7621d5a6d02b6 to your computer and use it in GitHub Desktop.
Save stla/1056b9897cc2b55a36a7621d5a6d02b6 to your computer and use it in GitHub Desktop.
normalize a JSON string (add correct quotes, remove comments, blank lines, and so on)
// 22 Feb 2020 TODO:
// needs trailing comma fix (allow trailing comma in arrays and object literals).
// see JWCC at https://nigeltao.github.io/blog/2021/json-with-commas-comments.html
export { normalize };
// var normalize = (function() {
// ///////////////////////////////////////////////////
// REVISE THIS FILE, PUT THE MAIN FUNCTION AT TOP,
// HELPERS BELOW. A LOT OF THE CHAINED FUNCTIONS ARE
// HARD TO READ WITH ALL THE INTERSPERSED COMMENTS.
// ///////////////////////////////////////////////////
/**
* Helper functions and queries for normalize() method.
*/
const BLANK_LINES = /([\n](\s)*(?:\n))/g;
const LINE_COMMENTS = /[\"\']*(?:[\s\w]]*)(\/\/[^\n^\"^\}]*)/g;
// Attempt to handle /* */ and /** javadoc style * line * line2 comments */
const MULTILINE_COMMENTS = /[\"\']*(?:\s*)(\/\*+)([^\*]*[\*])([^\/]*[\/])/g;
const SINGLE_QUOTES = /([^\\][\'](?:[^\']*)[\'](?:[^\}^\:^\,]*))/g;
const UNQUOTED_WORDS = /(?:(\,|\:|\{)\s*)([\w]+)/g;
function addQuotes(e) {
// return booleans unchanged
if (/false|true/.test(e)) {
return e;
}
// return numbers unchanged
if (/^\d?\.?\d+$/.test(e)) {
return e;
}
// replaces null, undefined, and NaN with empty string
if (/null|undefined|NaN/.test(e)) {
e = '';
}
return '"' + e + '"';
}
function replaceUnquoted(m) {
return m.replace(/\w+/g, addQuotes);
}
/**
* @method normalize Attempts to fix a JSON string with bare keys (restore
* missing quotes) and single-quoted keys and values, and remove line comments,
* block comments, blank lines, etc.
* @param {string} jsonText
* @returns {string}
*/
function normalize(jsonText) {
var fixed = jsonText
// 18 Oct 2018 - remove comments and blank lines
// 30 Oct 2018 - preserve quoted comments
// remove line comments
.replace(LINE_COMMENTS, e => {
// Ignore if line comment is quoted.
return /[\"\']([\s\w]*[\:])?/.test(e[0]) ? e : '';
})
// remove multi-line comments
.replace(MULTILINE_COMMENTS, e => {
// Ignore if comment is quoted.
return /[\"\']/.test(e[0]) ? e : '';
})
// remove blank lines
.replace(BLANK_LINES, '\n')
// 17,18 oct 2018
// fix single quotes
// 15 feb 2019
// escaped apostrophes
.replace(SINGLE_QUOTES, (m) => {
/*
* Replace leading and trailing single quotes with double quotes, trim
* quoted spaces, ignore quoted apostrophes.
*/
var t = m.trim();
var p = m.split(t);
var r = p[0] + t.replace(/^[\']/, '"') + p[1];
return r.replace(/([\'])(?:[\s]*)$/, (e) => {
return '"' + (e.length > 1 ? e.substring(1) : '');
});
})
// 17 october 2018
// success
// add missing quotes
.replace(UNQUOTED_WORDS, replaceUnquoted)
// 28 December 2019 - fix [01] and { 01: 01 }
// e.g., replace 01 with "01"
.replace(/(?=[^\"^\']+)[0]+([1-9)+(?=[^\"^\']+)/g, function (e) {
return '"' + e + '"';
})
// trim it
.trim();
return fixed;
}
// return normalize;
//})();
/*
28 December 2019 - fix [01] and { 01: 01 }
var tests = [
'[01]',
normalize('[01]'),
'{ 01: 01 }',
normalize('{ 01: 01 }')
];
var results = tests.map(function(test) {
var result = "OK: " + test;
try {
JSON.parse(test);
} catch (e) {
result = 'Error: ' + test + '; ' + e;
}
return result;
});
console.log(JSON.stringify(results, null, 2));
*/
/*
[
"Error: [01]; SyntaxError: JSON.parse: expected ',' or ']' after array element at line 1 column 3 of the JSON data",
"OK: [\"01\"]",
"Error: { 01: 01 }; SyntaxError: JSON.parse: expected property name or '}' at line 1 column 3 of the JSON data",
"OK: { \"01\": \"01\" }"
]
*/
import { normalize } from '/src/data/normalize.js';
describe('normalize(JSONString)', function () {
// Expect `chai` to be global along with `describe` and `it`.
var assert = chai.assert;
describe('single transform', function () {
it('trims input', () => {
var json = `
{}
`;
var result = normalize(json);
assert.strictEqual(result, '{}');
});
it('removes line comments', () => {
var json = `
// line comment 1
{ // line comment 2
// line comment 3
}
// line comment 4
`;
var actual = normalize(json);
var expected = `{
}`;
assert.strictEqual(actual, expected);
});
it('removes multiline comments', () => {
var json = `
/*
multiline comment 1
*/
{/* multiline comment 2
// line comment 3
*/
}
/* multi comment 4
*/
`;
var actual = normalize(json);
var expected = `{
}`;
assert.strictEqual(actual, expected);
});
it('removes javadoc comments', () => {
var json = `
/**
* javadoc 1
*/
{/**
* javadoc 2
*/
}
/**
* javadoc 3
*/
`;
var actual = normalize(json);
var expected = `{
}`;
assert.strictEqual(actual, expected);
});
it('ignores quoted comments', function () {
var json = `{ "line": " // should remain ", "multi": " /* should remain */", "javadoc": "/** should remain * in place. */" }`;
var result = normalize(json);
assert.strictEqual(result, json);
});
it('ignores urls', function () {
var json = `{ "url": "https://anything" }`;
var result = normalize(json);
assert.strictEqual(result, json);
});
it('replaces single quoted entries with double quoted entries', () => {
var json = `
{
'name': 'first'
}
`;
var actual = normalize(json);
var expected = `{
"name": "first"
}`;
assert.strictEqual(actual, expected);
});
it('removes blank lines', () => {
var json = `
{
}
`;
var actual = normalize(json);
var expected = `{
}`;
assert.strictEqual(actual, expected);
});
it('adds double quotes to unquoted keys', () => {
var json = `
{
key: "value",
key2: "21,213"
}
`;
var actual = normalize(json);
var expected = `{
"key": "value",
"key2": "21,213"
}`;
assert.strictEqual(actual, expected);
});
it('replaces null, undefined, and NaN with empty strings', () => {
var json = `
{
"null": null,
"undefined": undefined,
"NaN": NaN
}
`;
var actual = normalize(json);
var expected = `{
"null": "",
"undefined": "",
"NaN": ""
}`;
assert.strictEqual(actual, expected);
});
it('ignores unquoted booleans', function () {
var json = `{ "f": false, "t": true }`;
var result = normalize(json);
assert.strictEqual(result, json);
});
it('ignores unquoted numbers', function () {
var json = `{ "9": 9, "-0": -0, ".2": .2 }`;
var result = normalize(json);
assert.strictEqual(result, json);
});
it('preserves quoted apostrophes', function() {
var json = `{ "escaped": "Joe\'s", "not-escaped": "Joe's" }`;
var result = normalize(json);
assert.strictEqual(result, json);
});
});
describe('bulk transform', function () {
var textContent = `
{
// line comment
/*
multi line
*/
/**
* javadoc
*/
"multi": " /* should remain */ ",
"line": " // should remain ",
"javadoc": " /** should remain * in place */",
apostrophes: {
"escaped": "Joe\'s",
"not-escaped": "Joe's"
},
applicant: {
name : {
first : "my first",
'last' : 'my last'
},
"address" : {
street1 : '1234 Fifth St.',
'street2' : "Suite Sixteen",
"street3" : undefined,
"city" : "The Citu",
"state" : null,
"postalCode" : '12345-6789'
}
},
merchantURL: "https://merchant/url"
}
`;
var result = normalize(textContent);
it('should process without throwing or runaway backtracking', () => {
assert.ok(result);
});
it('should parse result', () => {
var data = JSON.parse(result);
assert.equal(data['multi'], ' /* should remain */ ');
assert.equal(data['line'], ' // should remain ');
assert.equal(data['javadoc'], ' /** should remain * in place */');
assert.equal(data["apostrophes"]["not-escaped"], "Joe\'s");
assert.equal(data.applicant.name.first, 'my first');
assert.equal(data.applicant.name.last, 'my last');
assert.equal(data.applicant.address.street1, '1234 Fifth St.');
assert.equal(data.applicant.address.street2, 'Suite Sixteen');
assert.strictEqual(data.applicant.address.street3, '', 'undefined value should be replaced with empty string');
assert.equal(data.applicant.address.city, 'The Citu');
assert.strictEqual(data.applicant.address.state, '', 'null should be replaced with empty string');
assert.equal(data.applicant.address.postalCode, '12345-6789');
assert.equal(data.merchantURL, "https://merchant/url");
});
});
});
// 21 october 2018 note:
// API emerginng as:
// + JSON.normalize(string) -> string
// + JSON.path(object) -> map
// + JSON.revive(pathMap) -> object
// WORK IN PROGRESS
// 14 October 2018
// new gig use case:
// + convert json to path-map, then
// + make html with the path-map
~(function() {
// 22 October 2018
function template(map, key) {
// IN PROGRESS - FIX THIS REGEX THING
var name = /null|undefined/.test(key) ? '' : typeof key != 'string' ? '' + key : key;
var value = name in map ? map[name] : '';
return `
<input name="${ name }" value"${ value }">
`;
}
var test = template({ 'path.to.name': 'should work' }, 'path.to.name');
console.log(test);
var test2 = template({ 'path.to.nowhere': 'should not print this' }, 'path.to.name');
console.log(test2);
var test3 = template({ 'path.to.nowhere': 'should see nowhere' }, 'path.to.nowhere');
console.log(test3);
var test4 = template({}, null);
console.log( test4 );
var name = null;
var test5 = template({ null: 'nullity'}, name);
console.log( test5 );
})();
/////////////////////////////////////////////////////////////////////////////////////
// 15 Oct
// more wip
// fix invalid json
/*
before:
, w
{ w
' or { plus any space then word characters
after:
" : "
" : {
*/
var textContent = `
{
// line comment
/*
multi line
*/
/**
javadoc
*/
"multi": " /* should remain */ ",
"line": " // should remain ",
'so\'me' : entry,
applicant: {
name : {
first : "my first",
'last' : 'my last'
},
"address" : {
street1 : '1234 Fifth St.',
'street2' : "Suite Sixteen",
"street3" : undefined,
"city" : "The Citu",
"state" : null,
"postalCode" : '12345-6789'
}
}
}
`;
console.info('start');
console.log(textContent);
// 18 Oct 2018 - remove comments and blank lines
// 30 Oct 2018 - preserve quoted comments
console.info('remove comments');
textContent = textContent
// remove line comments
.replace(/[\"\']*(?:\s*)(\/\/[^\n^\"^\}]*)/g, e => {
console.info(e);
return /[\"\']/.test(e[0]) ? e : '';
})
// remove multi-line comments
.replace(/([\"\']*(?:\s*)(\/\*+)[^\*]*(\*\/))/gm, e => {
console.info(e);
return /[\"\']/.test(e[0]) ? e : '';
})
// remove blank lines
.replace(/([\n]?(\s)*(?:\n))/g, '\n');
console.log(textContent);
/*
{
"multi": " /* should remain *\/ ",
"line": " // should remain ",
'so'me' : entry,
applicant: {
name : {
first : "my first",
'last' : 'my last'
},
"address" : {
street1 : '1234 Fifth St.',
'street2' : "Suite Sixteen",
"street3" : undefined,
"city" : "The Citu",
"state" : null,
"postalCode" : '12345-6789'
}
}
}
*/
// 17,18 oct 2018
// fix single quotes
console.info('fix quotes')
var q = /([\'](?:[^\']*)[\'](?:[^\}^\:^\,]*))/g;
var t = textContent.replace(q, (m) => {
console.info("_" + m + "_");
return m.replace(/^[\']/, '"').replace(/([\'])(?:[\s]*)$/, e => {
return '"' + (e.length > 1 ? e.substring(1) : '');
});
});
console.warn(t);
/*
{
"multi": " /* should remain *\/ ",
"line": " // should remain ",
"so'me" : entry,
applicant: {
name : {
first : "my first",
"last" : "my last"
},
"address" : {
street1 : "1234 Fifth St.",
"street2" : "Suite Sixteen",
"street3" : undefined,
"city" : "The Citu",
"state" : null,
"postalCode" : "12345-6789"
}
}
}
*/
// 17 october 2018
// success
// add missing quotes
/*
16:59:17.972 "{ name: \"value\", address: { city: 'CITY' } }".replace(/(?:(\,|\:|\{)\s*)([\w]+)/g, (m,o,p,i,j) => { return m.replace(/\w+/g, e => { return '"' + e + '"' }); });
16:59:17.923 "{ \"name\": \"value\", \"address\": { \"city\": 'CITY' } }"
*/
function addQuotes(e) {
if (/null|undefined/.test(e)) {
e = ''
}
return '"' + e + '"'
}
function replaceUnquoted(m) {
return m.replace(/\w+/g, addQuotes);
}
var fixed = t.replace(/(?:(\,|\:|\{)\s*)([\w]+)/g, replaceUnquoted);
console.log( fixed );
/*
{
"multi": " /* should remain *\/ ",
"line": " // should remain ",
"so'me" : "entry",
"applicant": {
"name" : {
"first" : "my first",
"last" : "my last"
},
"address" : {
"street1" : "1234 Fifth St.",
"street2" : "Suite Sixteen",
"street3" : "",
"city" : "The Citu",
"state" : "",
"postalCode" : "12345-6789"
}
}
}
*/
var data = JSON.parse(fixed);
// 18 oct 2018
console.warn(data["so'me"],
data.applicant.name.first,
data.applicant.name.last,
data.applicant.address.street1,
data.applicant.address.street2,
data.applicant.address.street3,
data.applicant.address.city,
data.applicant.address.state,
data.applicant.address.postalCode
);
/*
entry my first my last 1234 Fifth St. Suite Sixteen The Citu 12345-6789
*/
// next up: convert data to path-map...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment