Run this Node script against your exported JSON file, which you'll need to enter in the FILEPATH variable.
The keyfor
method returns the relevant part of the URL. You can customize it based on the available properties.
Run this Node script against your exported JSON file, which you'll need to enter in the FILEPATH variable.
The keyfor
method returns the relevant part of the URL. You can customize it based on the available properties.
var fs = require('fs'), url = require('url'); | |
// path to your JSON file | |
var FILEPATH = 'format_json.json'; | |
// See https://developer.mozilla.org/en/docs/Web/API/URL for available URL properties | |
function keyfor(url) { | |
return url.hostname.replace(/^www\./,'') + url.path + url.search | |
}; | |
var index = {}; | |
JSON.parse( | |
fs.readFileSync(FILEPATH, 'utf8') | |
).forEach(function(item) { | |
var key = keyfor(url.parse(item.href)); | |
if (!index[key]) index[key] = []; | |
index[key].push(item); | |
}); | |
var duplicates = Object.keys(index).map(function(k){ | |
return this[k]; | |
}, index).filter(function(list) { | |
return list.length > 1; | |
}).map(function(list) { | |
return list.map(function(item) { | |
return item.href; | |
}).join('\n'); | |
}).join('\n\n'); | |
console.log(duplicates); |