Created
October 26, 2022 07:27
-
-
Save companje/94ccc77ada64696cb750070824e15c52 to your computer and use it in GitHub Desktop.
load, link and filter data in javascript from triplestore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function _fetch(url) { | |
const rawResponse = await fetch(url, { | |
method: "get", | |
}).catch((err) => { | |
console.error(err); | |
}); | |
if (!rawResponse) { | |
console.log("ERROR: Could not retrieve data."); | |
return Promise.reject(); | |
} | |
const response = await rawResponse.json(); | |
return Promise.resolve(response); | |
} | |
function _retrieveFromTripleStore(queryUrl, numPages) { //, results) { | |
// TODO: Dynamic check (run for loop until no results are returned anymore) | |
const promises = []; | |
for (let pageIdx = 1; pageIdx <= numPages; pageIdx++) { | |
const paginatedQueryUrl = queryUrl + `&page=${pageIdx}`; | |
// console.log("Retrieving", paginatedQueryUrl); | |
const promise = _fetch(paginatedQueryUrl); | |
// TODO: Concat results to results parameter | |
promises.push(promise); | |
} | |
return promises; | |
} | |
async function loadAllData(data) { | |
const BASE_URL = "https://api.data.netwerkdigitaalerfgoed.nl/queries/hetutrechtsarchief/"; | |
const POSTFIX = "/run?pageSize=10000"; | |
const DOCUMENTS_QUERY_URL = BASE_URL + "wo2-documenten" + POSTFIX; | |
const ADDRESSES_QUERY_URL = BASE_URL + "wo2-adressen" + POSTFIX; | |
const ADDRESSES_PER_DOCUMENT_QUERY_URL = BASE_URL + "wo2-adressen-per-document" + POSTFIX; | |
const PERSONS_PER_DOCUMENT_QUERY_URL = BASE_URL + "wo2-personen-per-document" + POSTFIX; | |
const PERSONS_QUERY_URL = BASE_URL + "wo2-personen" + POSTFIX; | |
const SOURCES_QUERY_URL = BASE_URL + "wo2-brontypes" + POSTFIX; | |
const PERSON_ADDRESS_DOCUMENT_QUERY_URL = BASE_URL + "wo2-persoon-op-adres-per-document" + POSTFIX; | |
//sources | |
data.sources = []; | |
sourcesPromises = _retrieveFromTripleStore(SOURCES_QUERY_URL,1); | |
sourcesPromises.forEach((promise) => promise.then( | |
(r) => (data.sources = r ? data.sources.concat(r) : data.sources) | |
)); | |
//documents | |
data.documents = []; | |
documentsPromises = _retrieveFromTripleStore(DOCUMENTS_QUERY_URL,2); | |
documentsPromises.forEach((promise) => promise.then( | |
(r) => (data.documents = r ? data.documents.concat(r) : data.documents) | |
)); | |
//persons | |
data.persons = []; | |
personsPromises = _retrieveFromTripleStore(PERSONS_QUERY_URL,2); | |
personsPromises.forEach((promise) => promise.then( | |
(r) => (data.persons = r ? data.persons.concat(r) : data.persons) | |
)); | |
//addresses | |
data.addresses = []; | |
addressesPromises = _retrieveFromTripleStore(ADDRESSES_QUERY_URL,2); | |
addressesPromises.forEach((promise) => promise.then( | |
(r) => (data.addresses = r ? data.addresses.concat(r) : data.addresses) | |
)); | |
//addressesPerDocument | |
data.addressesPerDocument = []; | |
addressesPerDocumentPromises = _retrieveFromTripleStore(ADDRESSES_PER_DOCUMENT_QUERY_URL,2); | |
addressesPerDocumentPromises.forEach((promise) => promise.then( | |
(r) => (data.addressesPerDocument = r ? data.addressesPerDocument.concat(r) : data.addressesPerDocument) | |
)); | |
//personsPerAddressPerDocument | |
data.personsPerAddressPerDocument = []; | |
personsPerAddressPerDocumentPromises = _retrieveFromTripleStore(PERSON_ADDRESS_DOCUMENT_QUERY_URL,2); | |
personsPerAddressPerDocumentPromises.forEach((promise) => promise.then( | |
(r) => (data.personsPerAddressPerDocument = r ? data.personsPerAddressPerDocument.concat(r) : data.personsPerAddressPerDocument) | |
)); | |
const dataPromises = [ | |
...sourcesPromises, | |
...documentsPromises, | |
...addressesPromises, | |
...personsPromises, | |
...addressesPerDocumentPromises, | |
...personsPerAddressPerDocumentPromises | |
]; | |
return await Promise.all(dataPromises); | |
} | |
function linkData(data) { | |
const result = {}; | |
//create lookup tables | |
result.sourcesById = Object.assign({}, ...data.sources.map((x) => ({ | |
[x.sourceId]: x | |
}))); | |
result.documentsById = Object.assign({}, ...data.documents.map((x) => ({ | |
[x.docId]: x | |
}))); | |
result.addressesById = Object.assign({}, ...data.addresses.map((x) => ({ | |
[x.addressId]: x | |
}))); | |
result.personsById = Object.assign({}, ...data.persons.map((x) => ({ | |
[x.personId]: x | |
}))); | |
//create arrays | |
result.sources = Object.values(result.sourcesById); | |
result.documents = Object.values(result.documentsById); | |
result.addresses = Object.values(result.addressesById); | |
result.persons = Object.values(result.personsById); | |
//link documentsById to sourcesById on key 'bronType' | |
const items = result.documentsById; | |
const lut = result.sourcesById; | |
for (const id in items) { | |
items[id]["sourceItem"] = lut[items[id]["sourceId"]]; | |
} | |
//link addresses to documents (and the other vice versa) | |
for (const item of data.addressesPerDocument) { | |
const doc = result.documentsById[item.docId]; | |
const address = result.addressesById[item.addressId] | |
//store address in list with address for certain document | |
if (!doc.addresses) doc.addresses = []; | |
doc.addresses.push(address); | |
//store documents that are associated to this address | |
if (!address.documents) address.documents = []; | |
address.documents.push(doc) | |
} | |
// persons per address per document | |
for (const item of data.personsPerAddressPerDocument) { | |
// console.log("item",item); //alleen id's + een labeltje niet perse nodig is | |
const doc = result.documentsById[item.docId]; | |
const person = result.personsById[item.personId]; | |
const address = result.addressesById[item.addressId] | |
//list of persons associated with address on certain document | |
if (!doc.personAtAddressItems) doc.personAtAddressItems = [] | |
doc.personAtAddressItems.push({ | |
"person": person, | |
"address": address | |
}) | |
//be able to get a list of persons associated with an address on any document | |
if (!address.persons) address.persons = [] | |
address.persons.push(person); | |
} | |
//store documents per sourceType | |
for (const source of result.sources) { | |
source.documents = result.documents.filter((doc)=>{ | |
return doc.sourceItem == source; | |
}); | |
} | |
//store addresses per sourceType | |
for (source of result.sources) { | |
source.addresses = result.addresses.filter((address)=>{ | |
for (const doc of source.documents) { | |
if (doc.addresses?.find((o)=>{ | |
return o==address; | |
})) return true; | |
} | |
return false; | |
}); | |
} | |
console.log("all addresses (unique):",result.addresses.length) | |
console.log("all documents:",result.documents.length) | |
console.log("all persons (mentions):",result.persons.length) | |
console.log("all sources:",result.sources.length) | |
return result; | |
} | |
function filterData(data, searchTerm, searchOption, selectedSources) { | |
if (!selectedSources) selectedSources = data.sources; //default = all sources | |
console.log(`searching for '${searchTerm}' in '${searchOption}' of '${selectedSources.map((o)=>{return " "+o.label})}'`) | |
//deze gaat álle adressen af en matcht ze met selectedSources[].addresses[] | |
let filteredData = data.addresses.filter((address)=>{ | |
if (selectedSources == data.sources) return true; //shortlane: all sources selected | |
for (const source of selectedSources) { | |
if (source.addresses?.indexOf(address)!=-1) { | |
return true; | |
} | |
} | |
return false; | |
}) | |
//deze loopt alle addressen per soort langs en vult een nieuwe array via push() en kijkt hierbij uit voor dubbele | |
//NB. Beide aanpakken lijken even snel. | |
// const filteredAddressesBySources = []; | |
// for (const source of selectedSources) { | |
// for (const address of source.addresses) { | |
// if (filteredAddressesBySources.indexOf(address)==-1) { | |
// filteredAddressesBySources.push(address); | |
// } | |
// } | |
// } | |
const doesAnyPersonContain = (persons, searchTerm) => { | |
if (!persons) return false; | |
for (const person of persons) { | |
if (person.label?.toLowerCase().includes(searchTerm.toLowerCase())) { | |
return true; | |
} | |
} | |
return false; | |
}; | |
const doesAddressContain = (address, searchTerm) => { | |
return address.label?.toLowerCase().includes(searchTerm.toLowerCase()); | |
}; | |
if (searchTerm) { | |
//filter by all: either persons or addresses | |
if (searchOption=="all") { | |
filteredData = filteredData.filter((address)=>{ | |
return ( | |
doesAnyPersonContain(address.persons, searchTerm) || | |
doesAddressContain(address, searchTerm) | |
); | |
}) | |
} | |
//filter by person name | |
else if (searchOption=="persons") { | |
filteredData = filteredData.filter((address)=>{ | |
return doesAnyPersonContain(address.persons, searchTerm); | |
}); | |
} | |
//filter by address | |
else if (searchOption=="addresses") { | |
filteredData = filteredData.filter((address)=>{ | |
return doesAddressContain(address, searchTerm); | |
}); | |
} | |
else { | |
throw new Error("Unknown searchOption: "+searchOption); | |
} | |
} | |
return filteredData; | |
} | |
let linkedData; //global to use in console | |
async function main() { | |
const data = {}; | |
console.log("loading"); | |
await loadAllData(data) | |
.then(() => { | |
console.log("linking"); | |
linkedData = linkData(data); | |
const searchTerm = "willem"; | |
const searchOption = "all"; //persons, addresses or all | |
const searchSources = [ | |
linkedData.sources[0], | |
linkedData.sources[1], | |
linkedData.sources[2] | |
] | |
console.log(linkedData.sources); | |
console.log("filtering"); | |
const filteredData = filterData(linkedData, searchTerm, searchOption, searchSources) | |
console.log("filteredData",filteredData); | |
console.log('example: filterData(linkedData, "willem", "all", [linkedData.sources[1]])') | |
}) | |
.catch((err) => { | |
console.log("error",err) | |
}); | |
} | |
console.log('%c Hallo Simon!!!', 'font-weight: bold; font-size: 50px;color: red; text-shadow: 3px 3px 0 rgb(217,31,38) , 6px 6px 0 rgb(226,91,14) , 9px 9px 0 rgb(245,221,8) , 12px 12px 0 rgb(5,148,68) , 15px 15px 0 rgb(2,135,206) , 18px 18px 0 rgb(4,77,145) , 21px 21px 0 rgb(42,21,113)'); | |
main(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment