Skip to content

Instantly share code, notes, and snippets.

@naoa
Last active December 30, 2015 12:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naoa/7828167 to your computer and use it in GitHub Desktop.
Save naoa/7828167 to your computer and use it in GitHub Desktop.
This script has the feature that extract wikipedia japanese category . %php category_jpfw5.php <database> <output_file>
<?php
$db = "127.0.0.1";
$db_name = $argv[1];
$table = "category";
$username = "mysql";
$password = "";
$output_file = $argv[2];
$con = mysql_connect($db, $username, $password);
if (!$con) {
exit('Database connection error');
}
$result = mysql_select_db($db_name, $con);
if (!$result) {
exit('Database select error');
}
$fw = fopen($output_file, 'a+');
$startTime = microtime(true);
$query = "SELECT cat_title FROM $table;";
$result = mysql_query($query, $con);
if(!$result){
echo "SELECT ERROR aborted" . "\n";
$con = mysql_close($con);
if (!$con) {
echo 'Database close error';
}
fclose($fw);
exit;
}
while ($item = mysql_fetch_array($result)){
$title = $item['cat_title'];
mb_regex_encoding("UTF-8");
if (preg_match("/^[ぁ-んァ-ヶー一-龠]+$/u",$title)) {
$title = $title;
if(mb_strlen($title) >= 5){
fwrite($fw,$title . "\n");
}
}
}
$endTime = microtime(true);
$elapsedTime = $endTime - $startTime;
$con = mysql_close($con);
if (!$con) {
exit('Database close error');
}
fclose($fw);
echo "Done.\n";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment