Skip to content

Instantly share code, notes, and snippets.

@dfkaye
Last active May 26, 2022 22:51
Show Gist options
  • Save dfkaye/85e7029d684fa77819c3bd592d8e6537 to your computer and use it in GitHub Desktop.
Save dfkaye/85e7029d684fa77819c3bd592d8e6537 to your computer and use it in GitHub Desktop.
clean.js - cycle tolerant deep copy of an object or array; removes blank, null keys, merges the prototype chain
// 10 April 2022
// Cycle tolerant deep clean.
// Returns a copy of an object/array, removing any blank keys, an
// alternative approach to using `delete` on an object's blank keys,
// Whereas both JSON.stringify() and structuredClone() both allow this...
/*
JSON.stringify({
"": "empty",
undefined: "undefined",
null: "null"
});
// "{\"\":\"empty\",\"undefined\":\"undefined\",\"null\":\"null\"}"
structuredClone({
"": "empty",
undefined: "undefined",
null: "null"
});
// Object { "": "empty", undefined: "undefined", null: "null" }
*/
// Iterates over `enumerable` properties, own and inherited,
// effectively merging the object's prototype properties into
// the object instance.
// Returns functions and non-object values unchanged.
// 13 April 2022
// - replace visiting object with visiting array and corresponding index lookup
// 16 April 2022
// - prevent prototype pollution via:
// `JSON.parse('{ "__proto__": { "polluted": 1 } }'`, and
// `Object.defineProperty(o, '__proto__', {
// value: { pwned: true }
// })`.
//
// - pollution via prototype property is not filtered here.
// - pollution via "__proto__" property is not filtered here.
// - But this is still the debate:
// Should we nuke data with a prototype chain?
//
// 17 April 2022: Possible answer, feels messy:
// Add a block list as a second parameter, and avoid copying those names
// in the list to the new cleaned object. So, prevent "__proto__" or
// "constructor" by default, block specified names like "isAdmin", and
// allow the rest, but from any inherited level.
function clean(data, block = [/* keys */]) {
function visit(item, visiting /* , blocked */) {
var at = visiting.indexOf(item);
if (at > -1) {
return visiting[at]
}
if (Object(item) !== item || typeof item == 'function') {
return item
}
visiting.push(item);
var isArray = Array.isArray(item);
var copy = isArray ? [] : {};
for (var k in item) {
// 16 April 2022
// This prevents prototype pollution via
// `JSON.parse('{ "__proto__": { "polluted": 1 } }'`, and
// `Object.defineProperty(o, '__proto__', {
// value: { pwned: true }
// })`.
// 17 April placeholder for block list idea
// if (k in blocked) {
if (/*!item.hasOwnProperty(k) ||*/ k == '__proto__') {
continue;
}
var value = item[k]
var result = visit(value, visiting /* , blocked */);
if (!/^(null|undefined|\s*)$/.test(result)) {
isArray
? copy.push(result)
: copy[k] = result;
}
}
visiting.pop()
return copy;
}
// 17 April 2022: Placeholder for block list idea:
// var blocked = Object.values(block.concat("__proto__", "constructor"));
return visit(data, [/* visiting */] /*, blocked */);
}
/** 10 April 2022 - deep, cycle tests **/
console.group("deep");
var deep = { "name": 0 }
var depth = 100;
var node = deep;
for (var i = 0; i < depth; i++) {
node.node = { name: i + 1 };
node = node.node;
}
var copy = clean(deep)
console.log(copy);
console.log(copy.node.name);
// 1
console.groupEnd("deep");
console.group("cycle");
var cycle = { };
cycle.cycle = cycle;
console.log(clean(cycle));
console.log(clean(cycle).cycle === cycle );
// true
console.groupEnd("cycle");
console.group("prototype pollution");
// First group of tests shows a customized ("polluted") prototype is not filtered.
var p = {
"__proto__": {
"name": "p",
"toString": function() { return this.name + " is customized" }
}
};
// The peculiar problem here is that even though __proto__ is defined on p, it is treated as an *inherited* property, which gives p the weird effect of inheriting its own __proto__:
console.log(p.hasOwnProperty("__proto__"))
// false
console.log(p.__proto__.toString())
// "p is customized"
console.log(p.toString())
// "p is customized"
// That makes cleaning objects of this type... challenging.
var c = clean(p);
console.log(c + "")
// "p is customized"
console.log(c.toString.toString())
// "function() { return this.name + " is customized" }"
console.log(c.__proto__.toString.toString())
// "function toString() { [native code] }"
// Second group of tests shows a polluted prototype created from JSON.parse() is filtered.
var x = JSON.parse('{ "__proto__": { "polluted": 1 } }')
var y = clean(x);
console.log("polluted:", "polluted" in y);
// "polluted: false"
// Third group shows prototype created from Object.defineProperty() is filtered.
var oq = Object.defineProperty({}, '__proto__', {
value: { "polluted": "defpwned" },
enumerable: true,
configurable: false,
writable: false
});
var q = clean(oq);
console.log("polluted:", "polluted" in q);
// "polluted: false"
console.groupEnd("prototype pollution");
/* add rest of console tests here... */
// 29 November 2017
// moved tests to separate file - less clutter
// added expected output for each console statement
// 16 April 2022
// renamed from tests-in-console.js
// to console.tests.js
/** test it out **/
console.info('** Array tests **')
console.log(clean([ [], [] ]))
// Array []
console.log(clean([ 1, 2, 3, {}, '5', 4, '', false, true, [], 'next', {}, {} ]))
// Array [ 1, 2, 3, "5", 4, false, true, "next" ]
console.log(clean([ 'a', [3, , 5, 8, [ 11, [], 99] ], 'c' ]))
/*
[…]
0: "a"
1: […]
0: 3
1: 5
2: 8
3: […]
0: 11
1: 99
length: 2
__proto__: Array []
length: 4
__proto__: Array []
2: "c"
length: 3
__proto__: Array []
*/
console.info('** Function tests **')
console.log(clean(String))
// function String()
console.info('** Math tests **')
console.log(clean(Math))
// Math { … }
console.info('** RegExp tests **')
console.log(clean(new RegExp('test')))
// /test/
console.info('** Date tests **')
console.log(clean(new Date()))
// Date 2017-11-29T22:40:46.087Z
console.info('** String tests **')
console.log(clean('string'))
// string
console.log(clean(new String('String object instance')))
// String { "String object instance" }
console.info('** Number tests **')
console.log(clean(5))
// 5
console.log(clean(new Number(555)))
// Number { 555 }
console.log(clean(NaN))
// NaN
console.log(clean(new Number(NaN)))
// Number { NaN }
console.info('** Boolean tests **')
console.log(clean(false))
// false
console.log(clean(new Boolean(false)))
// Boolean { false }
console.info('** Object tests **')
var test = { hello: 'h', '': '', 'null': null, 'undefined': undefined }
console.log('dirty', test)
// dirty Object { hello: "h", "": "", null: null, undefined: undefined }
console.log('cleaned', clean(test))
// cleaned Object { hello: "h" }
console.log(clean(test) != test)
// true
console.log(clean(1) == 1)
// true
console.log(clean(null) == null)
// true
console.log(clean({}))
// Object {}
console.log(clean([]))
// Array []
console.log(clean({ a: { b: '' }}))
// Object {}
console.info('** inheritance tests **')
var base = { type: 'base', status: 'not empty', example: 'example' }
var impl = Object.create(base, {
example: {
enumerable: true,
value: null
},
status: {
enumerable: true,
value: undefined
}
})
console.log(clean(base))
// Object { type: "base", status: "not empty", example: "example" }
console.log(clean(impl))
// Object { type: "base" }
impl.array = ['1']
console.log(clean(impl))
// Object { array: […], type: "base" }
impl.array = []
console.log(clean(impl))
// Object { type: "base" }
console.info('** weird tests **')
console.log(clean(new Promise(function(res, rej) {})))
// Object { }
console.log(clean(function* generator() {}))
// function generator()
console.log(clean(String.raw`${'template'} ${'literal'}!`))
// template literal!
// 28 November 2017
// Returns a copy of an object/array without blank entries.
// Iterates over `enumerable` properties, own and inherited.
// Alternative to using `delete` on an object's blank keys.
// This is the standard if-block version. See the terse operator logic version below.
// The console tests have been moved to a separate file.
function clean(source) {
if (source == null /* null or undefined */
|| typeof source != 'object' /* primitives */
/* built-ins and Object instances - e.g., new Number(5) */
|| 'PI' in source /* Math */
|| source != source.valueOf() /* NaN */
|| source == +source.valueOf()/* Number and Boolean */
|| 'match' in source /* String */
|| 'multiline' in source /* RegExp */
|| 'toJSON' in source /* Date */
) {
return source
}
let keys = {}
for (let key in source) {
let value = source[key]
let newValue = null
if (value != null) {
if (/object/.test(typeof value)) {
// use cleaned array if it has length
Array.isArray(value) && value.length && (value = clean(value)).length && (newValue = value)
// use cleaned object if value has keys
!Array.isArray(value) && Object.keys(value).length && (newValue = clean(value))
}
/string/.test(typeof value) && value.length && (newValue = value);
/number|boolean/.test(typeof value) && (newValue = value);
if (newValue != null) {
if (Object.keys(newValue).length || !/object/.test(typeof newValue)) {
keys[key] = newValue
}
}
}
}
var result = Array.isArray(source) ? [] : {}
if (Object.keys(keys).length) {
result = keys
if (Array.isArray(source)) {
result = []
Object.keys(keys).map(key => result.push(keys[key]))
}
}
return result
}
// 28 November 2017
// Returns a copy of an object/array without blank entries.
// Iterates over `enumerable` properties, own and inherited.
// Alternative to using `delete` on an object's blank keys.
// This is the anti-idiomatic terse version.
// The console tests have been moved to a separate file.
function clean(source) {
if (source == null /* null or undefined */
|| typeof source != 'object' /* primitives */
/* built-ins and Object instances - e.g., new Number(5) */
|| 'PI' in source /* Math */
|| source != source.valueOf() /* NaN */
|| source == +source.valueOf()/* Number and Boolean */
|| 'match' in source /* String */
|| 'multiline' in source /* RegExp */
|| 'toJSON' in source /* Date */
) {
return source
}
let keys = {};
for (let key in source) {
let value = source[key];
let newValue = null;
value != null && (
Array.isArray(value)
&& value.length
&& (value = clean(value)).length // use cleaned array if it has length
&& (newValue = value),
!Array.isArray(value)
&& /object/.test(typeof value)
&& Object.keys(value).length
&& (newValue = clean(value)), // use cleaned object if value has keys
/string/.test(typeof value)
&& value.length
&& (newValue = value),
/number|boolean/.test(typeof value)
&& (newValue = value),
newValue != null
&& (Object.keys(newValue).length || !/object/.test(typeof newValue))
&& (keys[key] = newValue)
);
}
let result = Array.isArray(source) ? [] : {}
let list = Object.keys(keys);
list.length
&& !Array.isArray(source)
&& (result = keys);
list.length
&& Array.isArray(source)
&& (result = [])
&& (list.map(key => result.push(keys[key])));
return result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment