Instantly share code, notes, and snippets.
Created
December 23, 2011 10:27
-
Save hippietrail/1513820 to your computer and use it in GitHub Desktop.
Tool to analyse IATA airport code tags on travel.stackexchange.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<title>Travel StackExchange / jQuery / Stack Exchange API / YQL / CORS</title> | |
<script id="jqs" type="text/javascript" src="http://code.jquery.com/jquery-latest.min.js"></script> | |
<style type="text/css"> | |
.container { | |
width:100%; | |
} | |
.left { | |
width:75%; | |
float:left; | |
} | |
.right { | |
width:25%; | |
float:right; | |
} | |
.iata { | |
color:blue | |
} | |
.tagwiki { | |
color:red | |
} | |
.warn { | |
background-color:yellow | |
} | |
.err { | |
background-color:red | |
} | |
tt { | |
font-weight:bold | |
} | |
</style> | |
<script type="text/javascript"> | |
// TODO We don't know when everything is finished | |
// TODO Need to track and match all queries with successes/failures | |
// enable and/or play with this to test error handling code | |
$.ajaxSetup({ | |
//timeout: 780 | |
}); | |
var outstanding_ajax = 0; | |
var good_excerpt_total = 0; | |
var no_excerpt_total = 0; | |
var bad_excerpt_total = 0; | |
var iata_total = 0; | |
var bad_iata_total = 0; | |
function do_stackexchange() { | |
var maxpagesize = 100; | |
var jqxhr2; | |
$("#fourm input").prop("disabled", true); | |
var pagecount; | |
var remainder; | |
// TODO include this in the loop instead of being an extra call before the loop | |
jqxhr2 = $.getJSON("http://api.travel.stackexchange.com/1.1/tags?jsonp=?", | |
{ pagesize: 0 }, | |
function(data) { | |
remainder = data.total % maxpagesize; | |
pagecount = ( data.total - remainder ) / maxpagesize; | |
pagecount += (remainder != 0); | |
$("#tag-total").text(data.total); | |
get_all_sx_3letter_tags(maxpagesize, pagecount); | |
} | |
) | |
.error(function(jqXHR, textStatus, errorThrown) { | |
$("<div/>").attr("class", "err").text("sx info < ERROR st: " + textStatus).prependTo("#debug"); | |
}); | |
//$("#fourm input").prop("disabled", false); | |
} | |
// request a batch of airport codes - asynchronous | |
function do_airportcodes(page, codes) { | |
// YQL supports CORS (but MSIE and Opera do not) | |
var params = $.param( | |
{ | |
// TODO ap: codes, | |
q: "select * from json where url in (" | |
+ "select url from uritemplate where template=\"" | |
+ "http://airportcode.riobard.com/airport/{airport}?fmt=JSON" | |
+ "\" and airport in (" | |
+ $.map(codes, function(v){ return "\"" + v + "\""; }).join(",") | |
+ "))", | |
format: "json", | |
diagnostics: "true" | |
}/*, | |
true // jQuery.ajaxSettings.traditional*/ | |
); | |
// TODO "http://query.yahooapis.com/v1/public/yql/peter/airports" | |
var $url = "http://query.yahooapis.com/v1/public/yql"; | |
// if no CORS support then + "?callback=?" | |
if (!$.support.cors) { | |
$url += "?callback=?"; | |
} | |
$("<div/>").text(++outstanding_ajax + " outstanding ajax (air)").prependTo("#debug"); | |
$("#os-ajax").text(outstanding_ajax); | |
$.getJSON($url, params, | |
function(data){ | |
var arr; | |
// YQL JSON seems to be converted from XML so varies depending on how many results were returned. Normalize to an array. | |
if (data.query.count > 1) { | |
arr = data.query.results.json; | |
} else { | |
arr = []; | |
if (data.query.count == 1) { | |
arr[0] = data.query.results.json; | |
} | |
} | |
var i = 0, j = 0, n = codes.length, m = data.query.count; | |
while (i < n) { | |
if (j < m && codes[i] === arr[j].code.toLowerCase()) { | |
var docompare = false; | |
var li = $("#" + codes[i]); | |
if (li.has("div.tagwiki").length) { | |
docompare = true; | |
} | |
$("<div/>").attr("class", "iata").text(arr[j].name).appendTo(li); | |
$("<div/>").attr("class", "iata").text(arr[j].location).appendTo(li); | |
$("#iata-total").text(++iata_total); | |
if (docompare) { | |
var name = arr[j].name; | |
var loc = arr[j].location; | |
var both = name + " " + loc; | |
var tw = li.children("div.tagwiki").text(); | |
$("<div/>").text("name distance: " + levenshtein(tw, name) + " (wiki->iata)").appendTo(li); | |
$("<div/>").text(" loc distance: " + levenshtein(tw, loc) + " (wiki->iata)").appendTo(li); | |
$("<div/>").text("both distance: " + levenshtein(tw, both) + " (wiki->iata)").appendTo(li); | |
} | |
j++; | |
} else { | |
$("<div/>").attr("class", "iata warn").text("Invalid IATA airport code").appendTo("#" + codes[i]); | |
$("#bad-iata-total").text(++bad_iata_total); | |
} | |
i++; | |
} | |
} | |
) | |
.error(function(jqXHR, textStatus, errorThrown) { | |
$.each(codes, function(i, code){ | |
$("<div/>").attr("class", "err").text("ac timeout").appendTo("#" + code); | |
}); | |
$("<div/>").attr("class", "err").text("air < ERROR st: " + textStatus + " : " + codes).prependTo("#debug"); | |
}) | |
.complete(function(jqXHR, textStatus, errorThrown) { | |
$("<div/>").text(--outstanding_ajax + " outstanding ajax (air)").prependTo("#debug"); | |
$("#os-ajax").text(outstanding_ajax); | |
}); | |
} | |
// request a batch of tag wikis - asynchronous | |
function do_tagwikis(page, codes) { | |
// Stack Exchange doesn't support CORS, use JSONP | |
$("<div/>").text(++outstanding_ajax + " outstanding ajax (tw)").prependTo("#debug"); | |
$("#os-ajax").text(outstanding_ajax); | |
$.getJSON("http://api.travel.stackexchange.com/1.1/tags/" + codes.join(";") + "/wikis?jsonp=?", | |
// success callback | |
function(data){ | |
$.each(data.tag_wikis, function(i, ele){ | |
var classes = "tagwiki"; | |
if (ele.wiki_excerpt.toLowerCase().indexOf("air") == -1) { | |
classes += " warn"; | |
$("#bad-excerpt-total").text(++bad_excerpt_total); | |
} else { | |
$("#excerpt-total").text(++good_excerpt_total); | |
} | |
var docompare = false; | |
var li = $("#" + ele.tag_name); | |
if (li.has("div.iata").length) { | |
docompare = true; | |
} | |
$("<div/>").attr("class", classes).text(ele.wiki_excerpt).appendTo(li); | |
if (docompare) { | |
var name = li.children("div.iata").eq(0).text(); | |
var loc = li.children("div.iata").eq(1).text(); | |
var both = name + " " + loc; | |
var tw = ele.wiki_excerpt; | |
$("<div/>").text("name distance: " + levenshtein(tw, name) + " (iata->wiki)").appendTo(li); | |
$("<div/>").text(" loc distance: " + levenshtein(tw, loc) + " (iata->wiki)").appendTo(li); | |
$("<div/>").text("both distance: " + levenshtein(tw, both) + " (iata->wiki)").appendTo(li); | |
} | |
}); | |
// we got back fewer results than we passed in: some had no tag wiki | |
// TODO it's possible to get the right number of results because a tag wiki exists but has no excerpt! | |
// convert "data.tag_wikis" to a "map" | |
var map = {}; | |
$.each(data.tag_wikis, function(i, tag){ | |
map[tag.tag_name] = i; // could set to anything | |
}); | |
// for each "codes" if it's not in map emit a warning that the code has no tag wiki | |
$.each(codes, function(i, code) { | |
if (!(code in map)) { | |
$("<div/>").attr("class", "tagwiki warn").text("There's no tag wiki excerpt for this code").appendTo("#" + codes[i]); | |
$("#no-excerpt-total").text(++no_excerpt_total); | |
} | |
}); | |
} | |
) | |
.error(function(jqXHR, textStatus, errorThrown) { | |
$.each(codes, function(i, code){ | |
$("<div/>").attr("class", "err").text("tw timeout").appendTo("#" + code); | |
}); | |
$("<div/>").attr("class", "err").text("tw < ERROR st: " + textStatus).prependTo("#debug"); | |
}) | |
.complete(function(jqXHR, textStatus, errorThrown) { | |
$("<div/>").text(--outstanding_ajax + " outstanding ajax (tw)").prependTo("#debug"); | |
$("#os-ajax").text(outstanding_ajax); | |
}); | |
} | |
///////////////////////////////////////////////////////////////////////////////////// | |
// Helper function for get_all_sx_3letter_tags() | |
function tagPageFailed(page) { | |
return function(jqXHR, textStatus, errorThrown) { | |
$("<div/>").attr("class", "err").text("sx page > p" + page + " ERROR st: " + textStatus).prependTo("#debug"); | |
}; | |
} | |
function get_all_sx_3letter_tags(maxpagesize, pagecount) { | |
var three_tag_count = 0; | |
for (var p = 1; p <= pagecount; p++) { | |
$("<div/>").text(++outstanding_ajax + " outstanding ajax (sx)").prependTo("#debug"); | |
$("#os-ajax").text(outstanding_ajax); | |
// Stack Exchange doesn't support CORS, use JSONP | |
$.getJSON("http://api.travel.stackexchange.com/1.1/tags?jsonp=?", | |
{ | |
page: p, | |
pagesize: maxpagesize | |
}, | |
function(data) { | |
if (data.error) { | |
$("<div/>").text("sx < ? success data error").prependTo("#debug"); | |
} else { | |
var remainder = data.total % maxpagesize; | |
var pagecount = ( data.total - remainder ) / maxpagesize; | |
var codes = []; | |
$.each(data.tags, function(i, tag){ | |
if (tag.name.match(/^[a-z][a-z][a-z]$/)) { | |
three_tag_count++; | |
$("#3l-tag-total").text(three_tag_count); | |
// maintain this list in order using binary search | |
var ol = $("#output"); | |
var needle = tag.name; | |
var high = ol.children("li").length; | |
var low = 0; | |
var mid; | |
var id; | |
while (low < high) { | |
mid = parseInt((low + high) / 2) | |
id = $("ol#output li:nth-child(" + (mid+1) + ")").attr('id'); | |
if (needle < id) { | |
high = mid; | |
} else { | |
low = mid + 1; | |
} | |
} | |
id = $("ol#output li:nth-child(" + (low+1) + ")").attr('id'); | |
var li = $("<li id=\"" + tag.name + "\"><tt>" + tag.name.toUpperCase() + "</tt><sup>" + tag.count + "</sup></li>"); | |
if (typeof id == "undefined") { | |
li.appendTo("#output"); | |
} else { | |
li.insertBefore("#" + id); | |
} | |
codes.push(tag.name); | |
} | |
}); | |
$("<div/>").text(codes.length + " 3-letter tags in page " + p).prependTo("#debug"); | |
if (codes.length == 0) { | |
$("<div/>").text("no 3-letter tags in page " + p).prependTo("#debug"); | |
} else { | |
do_airportcodes(data.page, codes); | |
do_tagwikis(data.page, codes); | |
} | |
} | |
} | |
) | |
.error(tagPageFailed(p)) | |
.complete(function(jqXHR, textStatus) { | |
$("<div/>").text(--outstanding_ajax + " outstanding ajax (sx)").prependTo("#debug"); | |
$("#os-ajax").text(outstanding_ajax); | |
}); | |
} | |
$("<div/>").text("all ajax sent (sx)").prependTo("#debug"); | |
} | |
$(document).ready(function(){ | |
// initialize stuff | |
if ($.support.cors) { | |
$("<div/>").text("browser supports CORS").prependTo("#debug"); | |
} else { | |
$("<div/>").attr("class", "err").text("browser does not support CORS").prependTo("#debug"); | |
} | |
// add functions to the gui buttons | |
$("#stackexchange").click(function(){ do_stackexchange() }); | |
// enable the form controls now that init is done | |
$("#fourm input").prop("disabled", false); | |
}); | |
function levenshtein (s1, s2) { | |
// Calculate Levenshtein distance between two strings | |
// | |
// version: 1109.2015 | |
// discuss at: http://phpjs.org/functions/levenshtein | |
// + original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com) | |
// + bugfixed by: Onno Marsman | |
// + revised by: Andrea Giammarchi (http://webreflection.blogspot.com) | |
// + reimplemented by: Brett Zamir (http://brett-zamir.me) | |
// + reimplemented by: Alexander M Beedie | |
// * example 1: levenshtein('Kevin van Zonneveld', 'Kevin van Sommeveld'); | |
// * returns 1: 3 | |
if (s1 == s2) { | |
return 0; | |
} | |
var s1_len = s1.length; | |
var s2_len = s2.length; | |
if (s1_len === 0) { | |
return s2_len; | |
} | |
if (s2_len === 0) { | |
return s1_len; | |
} | |
// BEGIN STATIC | |
var split = false; | |
try { | |
split = !('0')[0]; | |
} catch (e) { | |
split = true; // Earlier IE may not support access by string index | |
} | |
// END STATIC | |
if (split) { | |
s1 = s1.split(''); | |
s2 = s2.split(''); | |
} | |
var v0 = new Array(s1_len + 1); | |
var v1 = new Array(s1_len + 1); | |
var s1_idx = 0, | |
s2_idx = 0, | |
cost = 0; | |
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) { | |
v0[s1_idx] = s1_idx; | |
} | |
var char_s1 = '', | |
char_s2 = ''; | |
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) { | |
v1[0] = s2_idx; | |
char_s2 = s2[s2_idx - 1]; | |
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) { | |
char_s1 = s1[s1_idx]; | |
cost = (char_s1 == char_s2) ? 0 : 1; | |
var m_min = v0[s1_idx + 1] + 1; | |
var b = v1[s1_idx] + 1; | |
var c = v0[s1_idx] + cost; | |
if (b < m_min) { | |
m_min = b; | |
} | |
if (c < m_min) { | |
m_min = c; | |
} | |
v1[s1_idx + 1] = m_min; | |
} | |
var v_tmp = v0; | |
v0 = v1; | |
v1 = v_tmp; | |
} | |
return v0[s1_len]; | |
} | |
</script> | |
</head> | |
<body> | |
Travel Stack Exchange airport code tag verification tool | |
<hr> | |
<form id="fourm"> | |
<input type="button" disabled="true" id="stackexchange" value="Check Travel Stack Exchange 3-letter tags"></input> | |
Total number of tags: <span id="tag-total">?</span>. | |
<!-- number of tags scanned --> | |
Number of 3-letter tags: <span id="3l-tag-total">?</span> | |
Number with good wiki excerpt: <span id="excerpt-total">?</span> | |
Number with bad wiki excerpt: <span id="bad-excerpt-total">?</span> | |
Number with no wiki excerpt: <span id="no-excerpt-total">?</span> | |
Number of valid IATA airport codes: <span id="iata-total">?</span> | |
Number of invalid IATA airport codes: <span id="bad-iata-total">?</span> | |
(Outstanding AJAX requests: <span id="os-ajax">?</span>) | |
</form> | |
<hr> | |
<div class="container"> | |
<div class="left"> | |
Output<hr> | |
<ol id="output"/> | |
</div> | |
<div class="right"> | |
Debugging<hr> | |
<div id="debug"></div> | |
</div> | |
</div> | |
</body> | |
</html> |
ankurdotb
commented
Jan 12, 2012
- Correctly displays tag wikis, but in overall stats a the top does not show how many have tag wikis.
- In overall statistics, does not show how many are valid IATA codes. Three-letter tags which are not IATA codes are all tagged correctly.
Yeah dealing with loops of asynchronous things is quite painful as it happens. I might implement deferred batches using jQuery or I might just leave it for a redesign now that I've learned a lot with this proof of concept.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment