Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Last active December 12, 2015 03:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hoehrmann/4710160 to your computer and use it in GitHub Desktop.
Save hoehrmann/4710160 to your computer and use it in GitHub Desktop.
<?php
#
# Ad-hoc oblique service for ngram databases with compatible interface
#
header('Content-Type: text/plain;charset=UtF-8');
if (@!isset($_REQUEST['q']) || @!isset($_REQUEST['nick'])) {
printf("I need a q! I need a nick! Sonst functioneren ik niet.");
exit();
}
$lang2code["en-us"] = 17;
$lang2code["en-gb"] = 18;
$lang2code["en"] = 15;
$lang2code["zh"] = 23;
$lang2code["fr"] = 19;
$lang2code["de"] = 20;
$lang2code["he"] = 24;
$lang2code["it"] = 22;
$lang2code["ru"] = 25;
$lang2code["es"] = 21;
$lang2code["en-us-2009"] = 5;
$lang2code["en-gb-2009"] = 6;
$lang2code["zh-2009"] = 11;
$lang2code["en-2009"] = 0;
$lang2code["fr-2009"] = 7;
$lang2code["de-2009"] = 8;
$lang2code["he-2009"] = 9;
$lang2code["ru-2009"] = 12;
$lang2code["es-2009"] = 10;
if (!!preg_match_all("/:(\S+)/", $_REQUEST['q'], $mq)) {
# TODO: ...
}
foreach ($mq[1] as $v) {
if (array_key_exists(strtolower($v), $lang2code)) {
$corpus = $lang2code[strtolower($v)];
} else if (is_numeric($v) && !isset($to_year)) {
$to_year = $v;
} else if (is_numeric($v) && !isset($from_year)) {
$from_year = $v;
} else {
# TODO: ...
}
}
# TODO: This is hopefully not the right defined-or idiom...
$from_year = isset($from_year) ? $from_year : "1800";
$to_year = isset($to_year) ? $to_year : "2000";
$corpus = isset($corpus) ? $corpus : 0;
$query = preg_replace("/^(\s*:(\S+)\s*)*/", "", $_REQUEST['q']);
if (strlen($query) < 1) {
printf(":(");
exit();
}
$fetch = "http://books.example.com/ngrams/graph?content="
. urlencode($query)
. "&year_start=" . urlencode($from_year)
. "&year_end=" . urlencode($to_year)
. "&corpus=" . urlencode($corpus)
. "&smoothing=3";
$ch = curl_init($fetch);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, 0);
$data = curl_exec($ch);
curl_close($ch);
if (!!preg_match_all("/data\\.addColumn\\('number',\\s*'(.*?)'\\)/",
$data, $mcol)) {
# ...
}
if (!!preg_match("/data\\.addRows\\((.*?)\\);/s",
$data, $mjson)) {
# ...
}
if (@!$mjson[1]) {
printf(":(");
exit();
}
$decoded = json_decode($mjson[1]);
if (@!$decoded) {
printf(":(");
exit();
}
$result = $_REQUEST['nick'] . ": ";
$max = max(array_slice(array_values(end($decoded)), 1));
$max = $max ? $max : 1;
foreach (end($decoded) as $k => $v) {
if ($k > 0) {
$result .= sprintf("[%s] %.03f%% ", $mcol[1][$k], $v/$max);
} else {
$result .= sprintf("[%s] %s ", $mcol[1][$k], $v);
}
}
print($result);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment