Skip to content

Instantly share code, notes, and snippets.

@cherenkov
Created October 12, 2012 15:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cherenkov/3879812 to your computer and use it in GitHub Desktop.
Save cherenkov/3879812 to your computer and use it in GitHub Desktop.
<?php
//PHP5.3でパーサーを作る勉強をしています。 http://ja.wikipedia.. - 人力検索はてな
//http://q.hatena.ne.jp/1350045786
function pr($var) {
echo '<pre>'; print_r($var); echo '</pre>';
}
header('Content-Type:text/html; charset=UTF-8');
require_once("./simplehtmldom/simple_html_dom.php");
/*
function getWikiSummary($keyword) {
$result = '';
$url = 'http://ja.wikipedia.org/wiki/' . urlencode($keyword);
$url = preg_replace('/%28/u', '(', $url); //括弧はこの処理をしないとダメみたい。
$url = preg_replace('/%29/u', ')', $url);
$html = file_get_html($url);
$ps = $html->find('#mw-content-text > p');
for ($i = 0, $n = count($ps); $i < $n; $i++) {
if (preg_match('/' . preg_replace('/_\(.+?\)/u' , '', $keyword) . '/u', $ps[$i]->innertext)) {
pr($ps[$i]->innertext);
$result .= $ps[$i]->innertext;
if ($ps[$i]->nextSibling()->tag === 'table') {
break;
}
}
}
$html->clear();
return $result;
}
*/
function getWikiSummary($keyword) {
$result = '';
$url = 'http://ja.wikipedia.org/wiki/' . urlencode($keyword);
$url = preg_replace('/%28/u', '(', $url); //括弧はこの処理をしないとダメみたい。
$url = preg_replace('/%29/u', ')', $url);
$html = file_get_html($url);
$ps = $html->find('#mw-content-text > p');
for ($i = 0, $n = count($ps); $i < $n; $i++) {
$result .= $ps[$i]->innertext;
if ($ps[$i]->nextSibling()->tag === 'table') {
break;
}
}
$html->clear();
return $result;
}
$keywords = array('東京太・ゆめ子', 'カナリア_(お笑い)');
echo '<dl>';
foreach ($keywords as $keyword) {
$summary = getWikiSummary($keyword);
echo '<dt>' . $keyword . '</dt>';
echo '<dd>' . $summary . '</dd>';
}
echo '</dl>';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment