Skip to content

Instantly share code, notes, and snippets.

@aylarov
Created June 30, 2017 16:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aylarov/dc8017f9935bfad75900e6681ffd7150 to your computer and use it in GitHub Desktop.
Save aylarov/dc8017f9935bfad75900e6681ffd7150 to your computer and use it in GitHub Desktop.
X-Wiki parser for VoxEngine
/**
* X-Wiki Parser to get JSON from infobox
*/
var generate = function(length) {
if (length !== 0) {
length = Math.abs(length) || 10;
}
var output = Math.random().toString(36).slice(2).toUpperCase();
if (length === 0) {
throw new Error('Lenght need to be an integer different than 0.');
} else if (length > 10) {
var tens = ~~(length/10);
while (tens--) {
output += Math.random().toString(36).slice(2).toUpperCase();
}
}
return output.substr(0, length);
};
var separator = generate();
var WikiInfobox = function(page, language, cb) {
var apiURL = 'http://'+ language + '.wikipedia.org/w/api.php?format' +
'=json&action=query&prop=revisions&rvprop=content&titles=' +
encodeURIComponent(page);
Logger.write("Requested URL: " + apiURL);
var wikiURL = 'http://' + language +'.wikipedia.org/wiki/';
Net.httpRequest(apiURL, function(e) {
if (e.code != 200) {
cb(e.code);
return;
}
var content = JSON.parse(e.text);
try {
content = content.query.pages;
var page = Object.keys(content);
content = content[page].revisions[0]['*'];
} catch(e) {
cb(e);
return;
}
if (content.indexOf('#REDIRECT') > -1) {
var redirectToPageNamed = content.match(/\[\[(.+?)\]\]/)[1];
WikiInfobox(redirectToPageNamed, language, cb);
return;
}
var startingPointRegex = /\{\{\s*[Ii]nfobox/;
var startArray = content.match(startingPointRegex);
if(!startArray) {
cb(new Error('No infobox found!'));
return;
}
var start = startArray.index;
var end = parse(content.substr(start, content.length));
content = content.substr(start+2, end);
content = content.replace(/\n/g, ' ');
var result = content.match(/\[\[(.+?)\]\]|\{\{(.+?)\}\}/ig);
if (result !== null) {
result.forEach(function(link) {
content = content.replace(link, link.replace(/\|/g, separator));
});
}
content = content.split('|');
content.shift();
var output = {};
content.forEach(function(element) {
var splited = element.split('=');
splited = splited.map(function(el) {
return el.trim();
});
try {
output[splited[0]] = stringToObject(
splited[0],
splited[1].replace(new RegExp(separator, 'g'), '|')
);
} catch(e) {
}
});
cb(null, output);
});
var stringToObject = function(name, value) {
var matches = [];
var fullMatches = [];
var pom = value;
value.replace(/\[\[(.*?)\]\]/g, function(g0,g1){ matches.push(g1); });
matches.forEach(function(entry) {
pom = pom.split('[['+entry+']]');
if(pom[0].match(/\S/) && pom[0].match(/^\s*[\.\,\:]*\s$/) === null) {
fullMatches.push({type: 'text', value: pom[0]});
}
fullMatches.push(entry);
pom = pom[1];
});
if(pom.match(/\S/) && pom.match(/^\s*[\.\,\:]*\s$/) === null) {
fullMatches.push({type: 'text', value: pom});
}
if (fullMatches.length > 0) {
var results = [];
var obj;
fullMatches.forEach(function(matchElement) {
if(typeof(matchElement)!='object') {
if (
matchElement.indexOf('File:') > -1 ||
matchElement.indexOf('Image:') > -1
) {
obj = {
type: 'image'
};
} else {
obj = {
type: 'link'
};
}
matchElement = matchElement.split('|');
if (matchElement.length > 1) {
obj.text = matchElement[1];
obj.url = wikiURL + matchElement[0];
} else {
obj.text = matchElement[0];
obj.url = wikiURL + matchElement[0];
}
results.push(obj);
} else {
results.push(matchElement);
}
});
if (results.length === 1) {
results = results.pop();
}
return results;
} else {
return {type:'text', value:value};
}
};
var parse = function(text) {
var brackets = 0;
for (var i=0, l=text.length; i<l; i++) {
if (text.charAt(i) === '{') {
brackets++;
} else if (text.charAt(i) === '}') {
brackets--;
}
if (brackets === 0 && i > 0) {
return i-1;
}
}
};
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment