Last active
October 15, 2023 22:26
-
-
Save MarketingPip/363891ba9253268d14d80a4a25096aed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/**! | |
* @license WTF-Link-QIDifier Plugin - is an extension for the wtf_wikipedia library. It enhances the functionality of wtf_wikipedia by adding a custom method to the models.Doc class. Allowing you to enrich Wikipedia document links with corresponding Wikidata QIDs. | |
* VERSION: 1.0.0 | |
* CREATED BY: JARED VAN VALKENGOED | |
* DO NOT REMOVE THIS NOTICE | |
*/ | |
import wtf from "https://esm.sh/wtf_wikipedia"; | |
/** | |
* Removes double spaces in a string. | |
* @param {string} str - The input string. | |
* @returns {string} The string with double spaces replaced by single spaces. | |
*/ | |
function removeDoubleSpaces(str) { | |
return str.replace(/\s\s+/g, ' '); | |
} | |
/** | |
* Maps QID to object names. | |
* @param {object} results - The Wikipedia Link results. | |
* @param {object} results - The SPARQL query results. | |
* @returns {object[]} An array of updated links with QIDs. | |
*/ | |
function mapQIDsToObjects(objects, qidList) { | |
// Create a new array to store the result | |
const result = []; | |
// Iterate through the objects | |
objects.forEach((obj) => { | |
const page = obj.page; | |
// Find the corresponding QID from the qidList based on "name" | |
const matchingQID = qidList.find((item) => item.name === page); | |
// Create a new object with the "QID" property added if a matching QID is found | |
const newObj = { ...obj }; | |
if (matchingQID) { | |
newObj.QID = matchingQID.QID; | |
} | |
// Add the new object to the result array | |
result.push(newObj); | |
}); | |
return result; | |
} | |
// WTF-Link-QIDifier | |
wtf.extend((models) => { | |
// Add a method to the Doc class | |
models.Doc.prototype.addQIDsToLinks = async function () { | |
// Ensure the custom property exists | |
this.WikiDataLinks = this.WikiDataLinks || {}; | |
// Ignore fetching results / QIDs for disambiguation pages | |
if(this.isDisambiguation() == true){ | |
return this.WikiDataLinks = null | |
} | |
// Extract links from the document and filter unique ones | |
const links = this.links().map((link) => link.json()); | |
let resultList = []; | |
const langCode = this._options.lang || "en" | |
// Set language code to find QIDs for with Wikidata | |
const lang = `@${langCode}` | |
links.forEach(obj => { | |
if (obj.page !== undefined && !resultList.includes(`${JSON.stringify(obj.page)}${lang}`)) { | |
resultList.push(`${JSON.stringify(obj.page)}${lang}`); | |
} | |
}); | |
resultList = resultList.join('\n'); | |
const endpointUrl = 'https://query.wikidata.org/sparql'; | |
// Construct a SPARQL query to fetch data from Wikidata | |
const sparqlQuery = `SELECT ?title ?item WHERE { | |
VALUES ?title {${resultList}} | |
?sitelink schema:about ?item; | |
schema:isPartOf <https://${langCode}.wikipedia.org/>; | |
schema:name ?title. | |
}`; | |
const headers = { 'Accept': 'application/sparql-results+json' }; | |
const sparqlUrl = `https://query.wikidata.org/sparql?format=json&query=${removeDoubleSpaces(sparqlQuery).trim()}`; | |
// Execute the SPARQL query to Wikidata | |
const data = await fetch("https://query.wikidata.org/sparql", { | |
method: "POST", | |
headers: { | |
"Accept": "application/json", | |
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", | |
// Add any other required headers here | |
}, | |
body: new URLSearchParams({ | |
query: sparqlQuery | |
}) | |
}); | |
/** | |
* Maps QID to object names. | |
* @param {object} results - The SPARQL query results. | |
* @returns {object[]} An array of objects with name and QID properties. | |
*/ | |
function mapQIDToName(results) { | |
results = results.results.bindings; | |
return results.map(obj => ({ | |
name: obj.title.value, | |
QID: obj.item.value.substring(obj.item.value.lastIndexOf("/") + 1) | |
})); | |
} | |
// Map QIDs to objects and add them to the document | |
this.WikiDataLinks = mapQIDsToObjects(links, mapQIDToName(await data.json())); | |
return this.WikiDataLinks | |
} | |
}); | |
// Example of plugin usage: | |
let doc = await wtf.fetch("Toronto") | |
await doc.addQIDsToLinks() | |
// OR call it after fetching like | |
console.log(doc.WikiDataLinks) |
hey, yeah this is cool. I like how you can batch the request for all the links.
Maybe it should be part of the api plugin?
I was thinking it could be part of the official API! Or separated as a plugin. (if that I think we should have it as separate plugin). But just wanted your insights! ps; I was hoping to connect and maybe watch your dev process and see if I can learn a bit (plus then I can shoot my random thoughts there or discord etc) - as I assume you will want to package this in your standard bundle process & CDN's etc..
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@spencermountain - check this over & see if this is official plugin worthy! (might need your touch ups / love but!). Hoping we can add this to repo (main).
ps; hoping we can call or chat via FB / iMessage etc! Let me know!