Last active
February 18, 2016 12:22
-
-
Save otajisan/2b5f82bd778e04e0fc23 to your computer and use it in GitHub Desktop.
PHPでElasticSearch x kuromojiの形態素解析を行うサンプル
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* ElasticSearchとのやり取りをするクラス | |
*/ | |
class ElasticSearchService | |
{ | |
/** 接続先のElasticSearchのURL & ポート */ | |
const BASE_URL = 'localhost'; | |
const PORT = '9200'; | |
/** | |
* リクエスト用のオプションを設定 | |
*/ | |
protected function create_options($url, $body) | |
{ | |
return array( | |
CURLOPT_URL => $url, | |
CURLOPT_POST => true, | |
CURLOPT_POSTFIELDS => $body, | |
CURLOPT_RETURNTRANSFER => true, | |
CURLOPT_HEADER => true, | |
); | |
} | |
/** | |
* リクエストURLを生成 | |
*/ | |
protected function create_request_url() | |
{ | |
return self::BASE_URL.':'.self::PORT.'/_analyze?'.http_build_query(array( | |
'analyzer' => 'kuromoji', // 形態素解析にkuromojiプラグインを利用する | |
'pretty' => true, | |
)); | |
} | |
/** | |
* リクエストを実行 | |
*/ | |
protected function request($options) | |
{ | |
$ch = curl_init(); | |
curl_setopt_array($ch, $options); | |
$result = curl_exec($ch); | |
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); | |
$header = substr($result, 0, $header_size); | |
$result = substr($result, $header_size); | |
curl_close($ch); | |
return array( | |
'Header' => $header, | |
'Result' => $result, | |
); | |
} | |
/** | |
* 指定した文字列を形態素解析し、結果を返却する | |
*/ | |
public function analyze_word($text) | |
{ | |
return $this->request($this->create_options($this->create_request_url(), $text)); | |
} | |
} | |
// 引数チェック | |
if ($argc < 2) { | |
exit('引数に解析対象の文字列を指定してください'); | |
} | |
$text = $argv[1]; | |
$service = new ElasticSearchService(); | |
// 実行 & 結果出力 | |
print_r($service->analyze_word($text)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment