Skip to content

Instantly share code, notes, and snippets.

@larsgw
Created January 1, 2019 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save larsgw/f775ae993110a43b306da4b8a4e7cac3 to your computer and use it in GitHub Desktop.
Save larsgw/f775ae993110a43b306da4b8a4e7cac3 to your computer and use it in GitHub Desktop.
Initial test to create Wikidata <-> CSL mapping
{
"Q571": "book",
"Q922203": "book",
"Q19705": "book",
"Q191067": "article",
"Q59908": "article",
"Q216665": "book",
"Q190399": "pamphlet",
"Q327611": "pamphlet",
"Q1173065": "book",
"Q488053": "book",
"Q580922": "article",
"Q628080": "book",
"Q759838": "article",
"Q254554": "book",
"Q862334": "book",
"Q220393": "book",
"Q934552": "book",
"Q914881": "book",
"Q950683": "book",
"Q1164267": "book",
"Q1503133": "book",
"Q918038": "book",
"Q1535505": "book",
"Q10389811": "entry",
"Q1580166": "entry",
"Q277759": "book",
"Q1667921": "book",
"Q1809676": "article",
"Q1923776": "book",
"Q2045717": "pamphlet",
"Q2106255": "article",
"Q2333573": "book",
"Q10870555": "report",
"Q1004391": "report",
"Q2438528": "report",
"Q337055": "book",
"Q2461013": "book",
"Q727715": "book",
"Q2933856": "book",
"Q1414013": "book",
"Q2939758": "book",
"Q128093": "book",
"Q3268307": "book",
"Q3357101": "book",
"Q3719255": "article",
"Q4955683": "book",
"Q5707594": "article-newspaper",
"Q6646525": "article",
"Q6960620": "book",
"Q7318358": "article",
"Q7515656": "book",
"Q7582241": "article",
"Q9295878": "article-newspaper",
"Q12041885": "book",
"Q13442814": "article-journal",
"Q13593966": "book",
"Q13583784": "book",
"Q14406742": "book",
"Q14523803": "book",
"Q17134316": "book",
"Q17123524": "book",
"Q17329259": "entry-encyclopedia",
"Q17518557": "article",
"Q17710980": "book",
"Q17710986": "book",
"Q19196418": "pamphlet",
"Q19389637": "entry",
"Q19917774": "article",
"Q20540385": "book",
"Q7433672": "book",
"Q193495": "book",
"Q22961568": "book",
"Q25110279": "book",
"Q15416": "broadcast",
"Q26260507": "broadcast",
"Q26742063": "book",
"Q30008669": "book",
"Q30009376": "book",
"Q30070590": "article-magazine",
"Q1238720": "book",
"Q38647918": "book",
"Q42350535": "article",
"Q50380591": "book",
"Q52269333": "book",
"Q53843792": "book",
"Q56330488": "book",
"Q3331189": "book",
"Q155171": "book",
"Q83790": "book",
"Q255135": "book",
"Q1496857": "book",
"Q471894": "book",
"Q586744": "book",
"Q680458": "book",
"Q856058": "book",
"Q956165": "book",
"Q836925": "report",
"Q1249682": "book",
"Q1279564": "book",
"Q1650727": "book",
"Q1516252": "book",
"Q1569753": "book",
"Q1670252": "book",
"Q1962297": "book",
"Q1926270": "report",
"Q2338167": "book",
"Q2368988": "book",
"Q2307704": "report",
"Q2309880": "report",
"Q2564985": "book",
"Q2568454": "book",
"Q2677586": "report",
"Q2782326": "report",
"Q3000100": "report",
"Q3099732": "report",
"Q4343952": "report",
"Q1760610": "book",
"Q4804740": "book",
"Q4690599": "report",
"Q5276074": "book",
"Q5394558": "book",
"Q6549529": "book",
"Q7918438": "report",
"Q15629444": "report",
"Q16968990": "book",
"Q17090395": "report",
"Q18059293": "book",
"Q18417520": "book",
"Q8275050": "book",
"Q19364663": "book",
"Q19355445": "report",
"Q20741385": "book",
"Q21009694": "book",
"Q21112633": "book",
"Q1883939": "book",
"Q23888763": "book",
"Q24634210": "song",
"Q25203386": "book",
"Q26225677": "broadcast",
"Q27027169": "report",
"Q39811647": "book",
"Q41274869": "report",
"Q42793760": "book",
"Q47123453": "report",
"Q47126552": "report",
"Q50965914": "book",
"Q55155149": "book",
"Q55442722": "book",
"Q55442818": "book",
"Q55771109": "book",
"Q55936923": "book",
"Q55936932": "book",
"Q55937502": "book",
"Q56028349": "book",
"Q56123235": "book",
"Q56013707": "report",
"Q56697520": "book",
"Q56753859": "book",
"Q57652900": "book",
"Q106833": "book",
"Q203490": "book",
"Q448980": "book",
"Q604219": "book",
"Q605076": "book",
"Q642946": "book",
"Q747381": "book",
"Q855753": "book",
"Q890239": "book",
"Q913554": "book",
"Q43013": "book",
"Q1041638": "book",
"Q944359": "book",
"Q1009641": "book",
"Q1027825": "book",
"Q1062404": "book",
"Q1106827": "book",
"Q1184488": "book",
"Q1528894": "book",
"Q1346592": "book",
"Q1616547": "book",
"Q1785330": "book",
"Q1977520": "book",
"Q1986787": "book",
"Q2104296": "book",
"Q2122442": "book",
"Q2122677": "book",
"Q2135225": "book",
"Q2208044": "book",
"Q2314679": "book",
"Q2331348": "book",
"Q2363145": "book",
"Q2374324": "book",
"Q2396513": "book",
"Q2514954": "book",
"Q2537127": "book",
"Q2787237": "book",
"Q2831984": "book",
"Q3045706": "book",
"Q3831821": "book",
"Q3915339": "book",
"Q4224691": "book",
"Q4515179": "book",
"Q4677625": "book",
"Q4686085": "book",
"Q5073531": "book",
"Q5159310": "book",
"Q6675210": "book",
"Q10666342": "book",
"Q11396303": "book",
"Q11750596": "book",
"Q12308638": "book",
"Q13137339": "book",
"Q13430107": "book",
"Q13636757": "book",
"Q13751595": "book",
"Q16046027": "book",
"Q16385949": "book",
"Q16736578": "book",
"Q20110471": "book",
"Q21598767": "book",
"Q21662746": "book",
"Q22988237": "book",
"Q25679217": "book",
"Q26267321": "book",
"Q26271823": "book",
"Q27560760": "book",
"Q29154430": "book",
"Q29586870": "book",
"Q31946409": "book",
"Q38143661": "book",
"Q52005090": "book",
"Q52153485": "book",
"Q57790812": "book",
"Q830689": "report",
"Q1555508": "broadcast",
"Q10885494": "article-journal",
"Q23927052": "paper-conference",
"Q187044": "article-newspaper",
"Q309481": "article-newspaper",
"Q267628": "article-newspaper",
"Q608971": "book",
"Q725377": "book",
"Q815410": "book",
"Q871232": "article-newspaper",
"Q1508646": "pamphlet",
"Q1497584": "book",
"Q1504425": "article-journal",
"Q193934": "book",
"Q193955": "book",
"Q1400059": "book",
"Q1991869": "book",
"Q1784036": "book",
"Q1813223": "book",
"Q2495037": "article-newspaper",
"Q2602337": "article-newspaper",
"Q990683": "book",
"Q2732056": "book",
"Q2326951": "book",
"Q2500820": "book",
"Q2774197": "article-journal",
"Q3257212": "book",
"Q4363806": "book",
"Q6548306": "book",
"Q7316896": "article-journal",
"Q10916116": "book",
"Q12183006": "article-journal",
"Q15706459": "article-journal",
"Q17628188": "article-newspaper",
"Q17633526": "article-newspaper",
"Q17586363": "book",
"Q18918145": "article-journal",
"Q19357149": "book",
"Q20088085": "entry",
"Q20088089": "entry",
"Q20136634": "article",
"Q20043999": "book",
"Q21198407": "book",
"Q21875313": "book",
"Q43290228": "article",
"Q56478376": "article-journal",
"Q1684600": "broadcast",
"Q19375673": "article",
"Q336181": "broadcast",
"Q1484397": "broadcast",
"Q1541065": "report",
"Q2025786": "book",
"Q2250844": "book",
"Q2308891": "report",
"Q2635894": "broadcast",
"Q2665960": "report",
"Q3237931": "broadcast",
"Q3956369": "broadcast",
"Q4034405": "book",
"Q6912943": "broadcast",
"Q8034663": "book",
"Q14623351": "broadcast",
"Q15961983": "broadcast",
"Q17148351": "book",
"Q18030695": "report",
"Q18311760": "broadcast",
"Q19776345": "broadcast",
"Q5398426": "broadcast",
"Q21190411": "broadcast",
"Q47512784": "report",
"Q56240541": "broadcast",
"Q58884": "broadcast",
"Q11504513": "broadcast",
"Q14942329": "broadcast",
"Q15836186": "broadcast",
"Q16068806": "broadcast",
"Q16206641": "broadcast",
"Q17145545": "broadcast",
"Q18640746": "broadcast",
"Q19845560": "broadcast",
"Q19973797": "broadcast",
"Q21191270": "broadcast",
"Q21217315": "broadcast",
"Q25090976": "broadcast",
"Q27912070": "broadcast",
"Q34682961": "broadcast",
"Q46706005": "broadcast",
"Q622812": "broadcast",
"Q240862": "broadcast",
"Q1187667": "broadcast",
"Q86860": "book",
"Q1358344": "broadcast",
"Q374466": "book",
"Q1249224": "report",
"Q914242": "broadcast",
"Q1742009": "broadcast",
"Q1011299": "song",
"Q1962634": "broadcast",
"Q1857766": "broadcast",
"Q2125867": "broadcast",
"Q2231383": "broadcast",
"Q3209941": "report",
"Q2681385": "book",
"Q3276244": "song",
"Q2915491": "book",
"Q5338721": "broadcast",
"Q11078958": "report",
"Q13632557": "book",
"Q19359000": "report",
"Q24633474": "song",
"Q26225765": "broadcast",
"Q28135032": "song",
"Q28136925": "song",
"Q39814262": "book",
"Q50823049": "report",
"Q50966803": "book",
"Q50966833": "book",
"Q55936401": "broadcast",
"Q182415": "broadcast",
"Q356055": "broadcast",
"Q431102": "broadcast",
"Q506240": "broadcast",
"Q661436": "broadcast",
"Q854995": "broadcast",
"Q986699": "broadcast",
"Q1261214": "broadcast",
"Q1366112": "broadcast",
"Q1407240": "broadcast",
"Q1407245": "broadcast",
"Q1472288": "broadcast",
"Q1619206": "broadcast",
"Q1819008": "broadcast",
"Q1924371": "broadcast",
"Q2081003": "broadcast",
"Q2304946": "broadcast",
"Q3464665": "broadcast",
"Q3744532": "broadcast",
"Q5428822": "broadcast",
"Q5455086": "broadcast",
"Q6626746": "broadcast",
"Q7697093": "broadcast",
"Q10676514": "broadcast",
"Q581714": "broadcast",
"Q11086742": "broadcast",
"Q276": "broadcast",
"Q288608": "broadcast",
"Q7603925": "broadcast",
"Q338632": "broadcast",
"Q3421644": "broadcast",
"Q2983424": "broadcast",
"Q5465514": "broadcast",
"Q5812300": "broadcast",
"Q17928402": "post-blog",
"Q21232614": "broadcast",
"Q23739": "broadcast",
"Q23745": "broadcast",
"Q124922": "broadcast",
"Q186286": "broadcast",
"Q178840": "broadcast",
"Q21191019": "broadcast",
"Q399811": "broadcast",
"Q482612": "broadcast",
"Q653916": "broadcast",
"Q526877": "broadcast",
"Q775344": "broadcast",
"Q662197": "broadcast",
"Q940462": "broadcast",
"Q1054760": "broadcast",
"Q1259759": "broadcast",
"Q1273568": "broadcast",
"Q1658957": "broadcast",
"Q1786567": "broadcast",
"Q1676730": "broadcast",
"Q3511312": "broadcast",
"Q1711400": "broadcast",
"Q1802588": "broadcast",
"Q1799894": "broadcast",
"Q2388283": "broadcast",
"Q3546572": "broadcast",
"Q3951815": "broadcast",
"Q4382232": "broadcast",
"Q4453959": "broadcast",
"Q4783297": "broadcast",
"Q5219865": "broadcast",
"Q5366501": "broadcast",
"Q5778915": "broadcast",
"Q5287435": "broadcast",
"Q6645282": "broadcast",
"Q7185299": "broadcast",
"Q7135559": "broadcast",
"Q7731786": "broadcast",
"Q7724161": "broadcast",
"Q7864671": "broadcast",
"Q9335577": "broadcast",
"Q9671105": "broadcast",
"Q11086745": "broadcast",
"Q13359539": "broadcast",
"Q20061443": "broadcast",
"Q20220309": "broadcast",
"Q20267837": "broadcast",
"Q20986817": "broadcast",
"Q22812458": "broadcast",
"Q170238": "broadcast",
"Q21188110": "broadcast",
"Q21191068": "broadcast",
"Q21233490": "broadcast",
"Q21191265": "broadcast",
"Q21664088": "broadcast",
"Q23368955": "broadcast",
"Q24886171": "broadcast",
"Q25360500": "broadcast",
"Q26644852": "broadcast",
"Q27868077": "broadcast",
"Q28698514": "broadcast",
"Q29555881": "broadcast",
"Q30939244": "broadcast",
"Q39032834": "broadcast",
"Q44097961": "broadcast",
"Q50062923": "broadcast",
"Q50914552": "broadcast",
"Q55082620": "broadcast",
"Q55848868": "broadcast",
"Q29197": "broadcast",
"Q3252662": "broadcast",
"Q60520": "book",
"Q707372": "book",
"Q2005755": "book",
"Q2538131": "book",
"Q2933082": "book",
"Q3196335": "book",
"Q3297186": "book",
"Q4700148": "book",
"Q7248423": "pamphlet",
"Q21190961": "book",
"Q21191134": "book",
"Q21198342": "book",
"Q54294944": "book",
"Q3956596": "book",
"Q27070652": "book",
"Q1352815": "broadcast",
"Q2933978": "broadcast",
"Q2981450": "book",
"Q2921195": "book",
"Q6888313": "book",
"Q7321644": "book",
"Q7890265": "book",
"Q21292860": "broadcast",
"Q24067746": "post-blog",
"Q26225493": "book",
"Q41436524": "book",
"Q47214765": "broadcast",
"Q56119332": "post-blog",
"Q623703": "book",
"Q655192": "book",
"Q742157": "article-newspaper",
"Q914229": "article-newspaper",
"Q1081564": "book",
"Q2069352": "book",
"Q2678443": "book",
"Q3434123": "book",
"Q3536411": "book",
"Q3740879": "book",
"Q3997226": "book",
"Q4903141": "book",
"Q4903147": "book",
"Q4903144": "book",
"Q4903161": "book",
"Q4903165": "book",
"Q4903168": "book",
"Q4903174": "book",
"Q4903175": "book",
"Q4903176": "book",
"Q4903181": "book",
"Q4903184": "book",
"Q4903185": "book",
"Q4903188": "book",
"Q4903210": "book",
"Q4903213": "book",
"Q5465451": "article-newspaper",
"Q6009879": "book",
"Q7999883": "article-newspaper",
"Q12270042": "book",
"Q15097084": "book",
"Q15982056": "article-newspaper",
"Q25110971": "book",
"Q25473994": "book",
"Q29581299": "book",
"Q45182324": "article-journal",
"Q54820068": "book",
"Q6888651": "book",
"Q3080071": "broadcast",
"Q6022825": "broadcast",
"Q15977715": "broadcast",
"Q24906243": "broadcast",
"Q34487266": "broadcast",
"Q220898": "broadcast",
"Q4922471": "broadcast",
"Q1192644": "broadcast",
"Q1193356": "broadcast",
"Q1193877": "broadcast",
"Q1193889": "broadcast",
"Q1198546": "broadcast",
"Q1200102": "broadcast",
"Q1200891": "broadcast",
"Q1203502": "broadcast",
"Q1328971": "broadcast",
"Q1003021": "book",
"Q1741854": "broadcast",
"Q2321734": "broadcast",
"Q2558761": "broadcast",
"Q4765080": "broadcast",
"Q11396323": "broadcast",
"Q12242979": "broadcast",
"Q15548228": "broadcast",
"Q16247289": "broadcast",
"Q18611586": "broadcast",
"Q21504449": "broadcast",
"Q21629439": "broadcast",
"Q27965091": "broadcast",
"Q27965088": "broadcast",
"Q27965089": "broadcast",
"Q27986339": "broadcast",
"Q28225717": "broadcast",
"Q29982285": "broadcast",
"Q47011432": "broadcast",
"Q54874833": "book",
"Q54932319": "broadcast",
"Q3071014": "broadcast",
"Q43265747": "broadcast",
"Q55422400": "broadcast",
"Q21759196": "broadcast",
"Q43082648": "broadcast",
"Q918098": "broadcast",
"Q17113138": "broadcast"
}
var wdk = require('wikidata-sdk')
require('isomorphic-fetch')
var query = `SELECT DISTINCT ?item ?parent WHERE {
?item wdt:P279+ wd:Q732577 .
?item wdt:P279 ?parent .
?parent wdt:P279* wd:Q732577 .
}`
var sourceMappings = {
Q49848: 'article',
Q191067: 'article',
Q13442814: 'article-journal',
Q18918145: 'article-journal',
Q38926: 'article-newspaper',
Q5707594: 'article-newspaper',
Q30070590: 'article-magazine',
Q686822: 'bill',
Q3331189: 'book',
Q571: 'book',
Q1555508: 'broadcast',
Q15416: 'broadcast',
Q1980247: 'chapter',
Q1172284: 'dataset',
Q10389811: 'entry',
Q19389637: 'entry',
Q17329259: 'entry-encyclopedia',
Q30070753: 'figure',
Q1027879: 'graphic',
Q4502142: 'graphic',
Q478798: 'graphic',
Q838948: 'graphic',
Q178651: 'interview',
Q49371: 'legislation',
Q820655: 'legislation',
Q2334719: 'legal_case',
Q87167: 'manuscript',
Q4006: 'map',
Q11424: 'motion_picture',
Q30070675: 'motion_picture',
Q187947: 'musical_score',
Q18536349: 'pamphlet',
Q190399: 'pamphlet',
Q26973022: 'paper-conference',
Q23927052: 'paper-conference',
Q253623: 'patent',
Q30070565: 'personal_communication',
Q30070439: 'personal_communication',
Q133492: 'personal_communication',
Q628523: 'personal_communication',
Q7216866: 'post',
Q17928402: 'post-blog',
Q10870555: 'report',
Q265158: 'review',
Q637866: 'review-book',
Q7366: 'song',
Q3741908: 'song',
Q30070318: 'song',
Q24634210: 'song',
Q861911: 'speech',
Q1266946: 'thesis',
Q187685: 'thesis',
Q131569: 'treaty',
Q36774: 'webpage'
}
var mappings = {}
var noMappings = []
var multiMappings = {}
function add (item, graph) {
if (item in sourceMappings) {
return mappings[item] = sourceMappings[item]
} else if (item in mappings) {
return mappings[item]
} else if (graph[item]) {
var parent = add(graph[item], graph)
if (mappings[item]) {
if (!multiMappings[item])
multiMappings[item] = [mappings[item]]
multiMappings[item].push(parent)
return mappings[item]
} else if (parent) {
return mappings[item] = parent
} else if (!noMappings.includes(item)) {
noMappings.push(item)
return mappings[item]
}
} else {
// Q732577
}
}
fetch(wdk.sparqlQuery(query))
.then(results => results.json())
.then(wdk.simplify.sparqlResults)
.then(results => results.reduce((results, {item, parent}) => {
results[item] = parent
return results
}, {}))
.then(results => Object.keys(results).forEach(result => add(result, results)))
.then(results => console.log(JSON.stringify(mappings, null, 2)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment