Skip to content

Instantly share code, notes, and snippets.

@naoa
Created February 2, 2015 12:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naoa/38b70672de64fcd1b6aa to your computer and use it in GitHub Desktop.
Save naoa/38b70672de64fcd1b6aa to your computer and use it in GitHub Desktop.
<?php
$db = "localhost";
$db_name = $argv[1];
$table = "text";
$article = $argv[2];
if ($handle = opendir($article)) {
while (false !== ($file = readdir($handle))) {
echo "-------$file------\n";
echo "XML loading and SQL importing...\n";
$xml = new XMLReader();
if(!$xml->open($article . "/" . $file)){
die('Failed to open file!');
}
$link = pg_connect("host=localhost dbname=${db_name} user=postgres");
if (!$link) {
exit('Database connection error');
}
$i = 0;
while ($xml->read()){
if ($xml->name === "page") {
$page = array();
$node = new SimpleXMLElement($xml->readOuterXML());
$page['id'] = pg_escape_string($node->id);
$page['title'] = pg_escape_string($node->title);
$page['text'] = pg_escape_string($node->revision->text);
if ($page['id'] != ""){
$query = "INSERT INTO " . $table . " VALUES ";
$query .= "(" . $page['id'] . ",'" . $page['title'] . "','" . $page['text'] . "');";
$startTime = microtime(true);
$result = pg_query($query);
if(!$result){
echo "INSERT ERROR aborted" . "\n";
$close = pg_close($link);
if (!$close) {
exit('Database close error');
}
exit;
}
$endTime = microtime(true);
$elapsedTime = $endTime - $startTime;
echo $page['id'] . "," . $elapsedTime . "\n";
$i++;
}
}
}
$startTime = microtime(true);
$query = "checkpoint;";
$result = pg_query($query);
if(!$result){
echo "INSERT ERROR aborted" . "\n";
$close = pg_close($link);
if (!$close) {
exit('Database close error');
}
exit;
}
$endTime = microtime(true);
$elapsedTime = $endTime - $startTime;
echo "checkpoint" . "," . $elapsedTime . "\n";
$xml->close();
echo $i . " records done.\n";
$close = pg_close($link);
if (!$close) {
exit('Database close error');
}
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment