Skip to content

Instantly share code, notes, and snippets.

@DrDub
Last active December 16, 2015 12:09
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save DrDub/5432860 to your computer and use it in GitHub Desktop.
The one-file-wonder behind http://tikiqa.duboue.net.
<?php
// this file assumes the augmented and interesting files are in ~/augmented, ~/interesting, respectively and a cloe of Elastica is available in ~/Elastica where ~ is /home/tikiqa
function __autoload ($class) {
$path = str_replace('\\', '/', $class);
if (file_exists('/home/tikiqa/Elastica/lib/' . $path . '.php')) {
require_once('/home/tikiqa/Elastica/lib/' . $path . '.php');
}
}
$has_query = array_key_exists('query',$_GET);
$has_thread = array_key_exists('thread',$_GET);
$raw = 0;
if(array_key_exists('raw',$_GET)){
if(!$has_query) {
die("must specify a query.");
}
$raw = 1;
}
if($has_query){
$elasticaClient = new \Elastica\Client();
$elasticaQueryString = new Elastica\Query\QueryString();
$elasticaQueryString->setQuery($_GET['query']);
$elasticaQuery = new Elastica\Query();
$elasticaQuery->setQuery($elasticaQueryString);
$elasticaIndex = $elasticaClient->getIndex('tiki');
$elasticaResultSet = $elasticaIndex->search($elasticaQuery);
}
?>
<?php
if($raw) {
print json_encode($elasticaResultSet->getResponse()->getData());
}else{
?>
<html>
<head>
<title>Tiki IRC QA Mining Demo</title>
</head>
<body>
<h1>Mining Question/Answer pairs from #tikiwiki history</h1>
See <a href="https://dev.tiki.org/IRC+QA+Mining">https://dev.tiki.org/IRC+QA+Mining</a> for details.
<hr>
<form method="GET">
Query for questions mined from <a href="http://irc.tiki.org/irclogger_logs/tikiwiki">#tikiwiki IRC logs</a>: <br/>
<textarea name="query" rows=10 cols=80>
<?php
if($has_query) {
echo $_GET['query'];
}
?>
</textarea>
<br/>
<input type="submit">
</form>
<?php
if($has_thread){
echo'<hr>';
// sanitize
preg_match('/([0-9]+)/', $_GET['thread'], $matches);
$unknown = 1;
if($matches[0]){
$thread = $matches[0];
$file = "/home/tikiqa/augmented/$thread.txt";
if(file_exists($file)) {
$unknown = 0;
$lines = file($file);
foreach($lines as $line) {
echo $line . '<br/>';
}
}
}
if($unknown) {
die("Unknown thread.");
}
} elseif($has_query) {
echo '<hr>';
echo '<ol>';
foreach ($elasticaResultSet as $elasticaResult) {
echo '<li>';
$data = $elasticaResult->getData();
$date = date('Y-m-d,D', $data['postDateEpoch']);
echo '<a href="http://irc.tiki.org/irclogger_log/tikiwiki?date='.$date.'" style="text-decoration: none"><b>'.$date.'</b></a> &nbsp;';
echo '<a href="/?thread=' . $data['thread'] . '" style="text-decoration: none">';
foreach(file("/home/tikiqa/augmented/" . $data['thread'] . ".txt") as $line) {
$match = preg_match("/^([0-9]+) [^:]+\: (.*)$/", $line, $matches);
if($match && $matches[1] == $data['postDateEpoch']) {
echo $matches[2] . '<br/>';
}
}
echo '</a><br/>';
echo '<small>'.$data['question'].'</small>';
echo '</li>';
echo '<br/>';
}
?>
</ol>
<?php
}
?>
</form>
</body>
</html>
<?php
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment