Skip to content

Instantly share code, notes, and snippets.

@book000

book000/README.md

Last active Jan 18, 2021
Embed
What would you like to do?
きららファンタジアニュースクローリング

きららファンタジアニュースクローリング

収集元

アプリ内ニュースはbrとかに揺れがある(<br>, <br/>, </br>, <br />)。また<p>&nbsp;</p>で改行しているケースもある。

2021/01/19 追記

アプリ内ニュースは次のアドレスに変更された様子。RSS XMLなさそうなので、普通にスクレイピングするしかなさそうかな?

とはいえ、私自身もうすでにほぼきららファンタジアをやっていないので、ここで実装することはありません。

<?php
function sendDiscordWebhook($message)
{
$url = "<DISCORD WEBHOOK URL>";
$data = [
"content" => $message
];
$header = array(
"Content-Type: application/json",
"Content-Length: ".strlen(json_encode($data)),
"User-Agent: DiscordBot (http://example.com, v0.0.1)"
);
$context = array(
"http" => array(
"method" => "POST",
"header" => implode("\r\n", $header),
"content" => json_encode($data)
)
);
$context = stream_context_create($context);
file_get_contents($url, false, $context);
}
if (file_exists(__DIR__ . "/kirara_app_readed.json")) {
$readed = json_decode(file_get_contents(__DIR__ . "/kirara_app_readed.json"), true);
} else {
$readed = [];
}
$xml = simplexml_load_file("https://krr-prd-web.star-api.com/news/feed/");
foreach ($xml->channel->item as $key => $item) {
if ($key == 1) {
continue;
}
//print_r($item);
$title = (string)$item->title;
$link = (string)$item->link;
$pubDate = (string)$item->pubDate;
$html = file_get_contents($link, false, $context);
foreach ($http_response_header as $header) {
$header = explode(": ", $header);
if ($header[0] == "Content-Encoding" && $header[1] == "gzip") {
$html = gzdecode($html);
}
}
preg_match("/<div class=\"newsText\">([\s\S]+)<\/div>/", $html, $m);
if (!isset($m[1])) {
// 再取得
sleep(1);
$html = file_get_contents($link, false, $context);
foreach ($http_response_header as $header) {
$header = explode(": ", $header);
if ($header[0] == "Content-Encoding" && $header[1] == "gzip") {
$html = gzdecode($html);
}
}
preg_match("/<div class=\"newsText\">([\s\S]+)<\/div>/", $html, $m);
if (!isset($m[1])) {
// 取得失敗
continue;
}
}
$text = $m[1];
$text = preg_replace("/<strong>(.+?)<\/strong>/", "**$1**", $text);
$text = preg_replace("/<del.+?>(.+?)<\/del>/", " ~~$1~~ ", $text);
$text = preg_replace("/<h1>(.+?)<\/h1>/", "__**$1**__\n", $text);
$text = str_replace("<br>", "\n", $text);
$text = str_replace("<br/>", "\n", $text);
$text = str_replace("</br>", "\n", $text);
$text = str_replace("<br />", "\n", $text);
$text = str_replace("&nbsp;", "\n\n", $text);
$text = str_replace("</p><p>", "\n", $text);
$text = strip_tags($text);
$text = trim($text);
echo $text;
if (!isset($readed[$link])) {
sendDiscordWebhook(":new:__**" . $title . "**__ (" . date("Y/m/d H:i:s", strtotime($pubDate)) . ")\n\n" . mb_strimwidth($text, 0, 1950, "...") . "\n\n" . $link);
$readed[$link] = $title;
} elseif (isset($readed[$link]) && $readed[$link] != $title) {
sendDiscordWebhook(":arrows_counterclockwise:__**" . $title . "**__ (" . date("Y/m/d H:i:s", strtotime($pubDate)) . ")\n\n" . mb_strimwidth($text, 0, 1950, "...") . "\n\n" . $link);
$readed[$link] = $title;
}
sleep(1);
}
file_put_contents(__DIR__ . "/kirara_app_readed.json", json_encode($readed));
<?php
function sendDiscordWebhook($message)
{
$url = "<DISCORD WEBHOOK URL>";
$data = [
"content" => $message
];
$header = array(
"Content-Type: application/json",
"Content-Length: ".strlen(json_encode($data)),
"User-Agent: DiscordBot (http://example.com, v0.0.1)"
);
$context = array(
"http" => array(
"method" => "POST",
"header" => implode("\r\n", $header),
"content" => json_encode($data)
)
);
$context = stream_context_create($context);
file_get_contents($url, false, $context);
}
if (file_exists(__DIR__ . "/kirara_web_readed.json")) {
$readed = json_decode(file_get_contents(__DIR__ . "/kirara_web_readed.json"), true);
} else {
$readed = [];
}
$json = file_get_contents("https://kirarafantasia.com/wp-json/wp/v2/posts");
$json = json_decode($json, true);
// めんどくさくなった。
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment