Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Lists stats for major cities on Wykop.pl
<?php
$data = file_get_contents('https://stat.gov.pl/statystyka-regionalna/rankingi-statystyczne/miasta-najwieksze-pod-wzgledem-liczby-ludnosci/');
$matches = [];
preg_match_all('/<td style="text-align: left;">(M\.st\.){0,1}(.*?)(&nbsp;){0,1}<\/td>/is', $data, $matches);
$tags = [];
$cityRank = [];
$i = 0;
foreach ($matches[2] as $city) {
$cityname = strtolower(iconv('utf8', 'ascii//TRANSLIT', str_replace([' ', '-'], ['', ''], $city)));
$cityRank[$cityname] = ++$i;
$tags[] = $cityname;
}
$trendResults = [];
$trends = [6, 12];
function linear_regression($x, $y)
{
$n= count($x);
$x_sum = array_sum($x);
$y_sum = array_sum($y);
$xx_sum = 0;
$xy_sum = 0;
for ($i = 0; $i < $n; $i++) {
$xy_sum += ($x[$i]*$y[$i]);
$xx_sum += ($x[$i]*$x[$i]);
}
$slope = (($n * $xy_sum) - ($x_sum * $y_sum)) / (($n * $xx_sum) - ($x_sum * $x_sum));
$intercept = ($y_sum - ($slope * $x_sum)) / $n;
return [
'slope' => $slope,
'intercept' => $intercept,
];
}
function getTagData($tag)
{
$data = file_get_contents('https://www.wykop.pl/ajax/suggest/?search_text=%23' . $tag);
$decoded = json_decode($data, true);
$matches = [];
preg_match('/<li data-content="#' . $tag . '".*?cbd">([0-9]+?) obs/is', $decoded['html'], $matches);
$obsrv = intval($matches[1]);
$data = file_get_contents('https://www.wykop.pl/tag/' . $tag);
$matches = [];
preg_match_all('/li id="[0-9]{4}month[0-9]{1,2}.*?<a href.*?<span>([0-9]+)/is', $data, $matches);
$actv = array_sum(($matches[1]));
$actvElems = $matches[1];
return [ $obsrv, $actv, $actvElems ];
}
$specialTags = [
'gorzowwielkopolski' => 'gorzow',
'bielskobiala' => 'bielsko'
];
$byActv =[];
$byObsrv =[];
foreach ($tags as $tag) {
list($obsrv, $actv, $actvElems) = getTagData($tag);
if (isset($specialTags[$tag])) {
list($specialObsrv, $specialActv, $specialActvElems) = getTagData($specialTags[$tag]);
$obsrv += $specialObsrv;
$actv += $specialActv;
$actvElems = array_map(function (...$arrays) {
return array_sum($arrays);
}, $actvElems, $specialActvElems);
}
$byActv[$tag] = intval($actv);
$byObsrv[$tag] = intval($obsrv);
foreach ($trends as $trendLen) {
$last = array_slice($actvElems, $trendLen * -1, $trendLen);
$linear = linear_regression(array_keys($last), $last);
$trend = intval($linear['slope']);
$trendResults[$tag]['t_' . $trendLen] = $trend;
}
}
arsort($byActv);
arsort($byObsrv);
echo 'Lista miast wg liczby obserwujących:' . PHP_EOL;
$i = 0;
foreach ($byObsrv as $city => $obsrv) {
echo ++$i . '. #' . $city . ': ' . $obsrv . ' (Wg mieszkańców: ' . $cityRank[$city] . ')' . PHP_EOL;
}
echo PHP_EOL . 'Lista miast wg aktywności tagu:' . PHP_EOL;
$i = 0;
foreach ($byActv as $city => $actv) {
echo ++$i . '. ' . $city . ': ' . $actv . ' ('. join('; ', $trendResults[$city]) . ')' . ' (Wg mieszkańców: ' . $cityRank[$city] . ')' . PHP_EOL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.