Skip to content

Instantly share code, notes, and snippets.

@naoa
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naoa/7e6c5a5eee8ec993c4f9 to your computer and use it in GitHub Desktop.
Save naoa/7e6c5a5eee8ec993c4f9 to your computer and use it in GitHub Desktop.
<?php
$article = $argv[1];
$xml = new XMLReader();
if(!$xml->open($article)){
die('Failed to open file!');
}
$rc = 1;
$startTimeAll = microtime(true);
while ($xml->read()){
if ($xml->name === "page") {
$page = array();
$node = new SimpleXMLElement($xml->readOuterXML());
$page['id'] = (string)$node->id;
$page['title'] = (string)$node->title;
$page['text'] = (string)$node->revision->text;
if ($page['id'] != ""){
$update_startTime = microtime(true);
$json_doc = array(
"_key" => $page['id'],
"title" => $page['id'] . "_" . $page['title'],
"text" => $page['text']
);
$qry = json_encode($json_doc);
$url="http://localhost:10041/d/load?table=text";
$req="POST";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_PORT, 10041);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $req);
curl_setopt($ch, CURLOPT_POSTFIELDS, $qry);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_exec ($ch);
curl_close ($ch);
$updateTime = microtime(true) - $update_startTime;
$search_startTime = microtime(true);
$query = urlencode($page['id'] . "_" . $page['title']);
$url="http://localhost:10041/d/select?table=text&match_columns=title&limit=0&match_escalation_threshold=-1&query=\"${query}\"";
$req="GET";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_PORT, 10041);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $req);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
$count = 0;
while($count == 0){
$data = curl_exec ($ch);
$json = json_decode($data);
$count = $json[1][0][0][0];
if($count == 1){
break;
} else {
echo "sleep 10msec\n";
usleep(100);
}
}
curl_close ($ch);
$searchTime = microtime(true) - $search_startTime;
$AllTime = microtime(true) - $update_startTime;
$now = microtime(true);
echo "${rc}, ${now}, ${updateTime}, ${searchTime}, ${AllTime}\n";
$rc++;
if($rc > 1000){
break;
}
}
}
}
$xml->close();
$endTimeAll = microtime(true);
$elapsedTimeAll = $endTimeAll - $startTimeAll;
$rc--;
echo $rc . " records done. Total = " . $elapsedTimeAll . "\n";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment