Skip to content

Instantly share code, notes, and snippets.

@hplc
Last active February 6, 2020 12:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hplc/af40473308ab4bf1b55e60088ca33f64 to your computer and use it in GitHub Desktop.
Save hplc/af40473308ab4bf1b55e60088ca33f64 to your computer and use it in GitHub Desktop.
Get the name of coronavirus infected communities in Shenzhen and convert to JSON.
<?php
$url = 'http://sz.bendibao.com/news/202023/827759.htm';
$page = file_get_contents($url);
$page = substr($page, strpos($page, '<strong>福田区</strong>')); // Start position
$page = substr($page, 0, strpos($page, '确诊病例曾到过我的小区,怎么办?')); // End position
$page = preg_replace('/&nbsp;/', '', $page);
$page = str_replace('<section powered-by="xiumi.us" font-size:15px;letter-spacing:0.544px;background-color:#ffffff;text-align:center;word-wrap:break-word="" !important;"="" style="margin: 10px 0px; padding: 0px; max-width: 100%; box-sizing: border-box; color: rgb(51, 51, 51);">', '', $page);
// echo $page;
$text = strip_tags($page);
// echo $text;
$string = trim($text);
$text = preg_replace('/ /', ' ', $text);
$a = explode(' ', $text);
// var_dump($a);
foreach ($a as $value) {
$value = trim($value);
if (! empty($value)) {
// echo mb_strlen($value, 'UTF-8');
// echo $value;
// echo PHP_EOL;
if (preg_match('/区$/', $value) && (mb_strlen($value, 'UTF-8') === 3 || preg_match('/新区$/', $value))) {
// echo "District: $value" . PHP_EOL;
$district = $value;
$b[$district] = array();
} else {
$b[$district][] = $value;
}
}
}
// var_dump($b);
$json = json_encode($b);
echo $json;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment