Skip to content

Instantly share code, notes, and snippets.

@realFlowControl
Created March 6, 2024 11:36
Show Gist options
  • Save realFlowControl/470253dc4e765837103687082e541522 to your computer and use it in GitHub Desktop.
Save realFlowControl/470253dc4e765837103687082e541522 to your computer and use it in GitHub Desktop.
1brc in PHP
<?php
/**
* Get the chunks that each thread needs to process with start and end position.
* These positions are aligned to \n chars because we use `fgets()` to read
* which itself reads till a \n character.
*
* @return array<int, array{0: int, 1: int}>
*/
function get_file_chunks(string $file, int $cpu_count): array {
$size = filesize($file);
if ($cpu_count == 1) {
$chunk_size = $size;
} else {
$chunk_size = (int) ($size / $cpu_count);
}
$fp = fopen($file, 'rb');
$chunks = [];
$chunk_start = 0;
while ($chunk_start < $size) {
$chunk_end = min($size, $chunk_start + $chunk_size);
if ($chunk_end < $size) {
while (fread($fp, 1) !== "\n") {
$chunk_end ++;
fseek($fp, $chunk_end);
}
}
$chunks[] = [
$chunk_start,
$chunk_end
];
$chunk_start = $chunk_end+1;
}
fclose($fp);
return $chunks;
}
$process_chunk = function (string $file, int $chunk_start, int $chunk_end) {
$stations = [];
$fp = fopen($file, 'rb');
fseek($fp, $chunk_start);
while ($data = fgets($fp)) {
$chunk_start += strlen($data);
if ($chunk_start > $chunk_end) {
break;
}
$pos2 = strpos($data, ';');
$city = substr($data, 0, $pos2);
$temp = (float)substr($data, $pos2+1, -1);
if (isset($stations[$city])) {
$station = &$stations[$city];
$station[3] ++;
$station[2] += $temp;
if ($temp < $station[0]) {
$station[0] = $temp;
} elseif ($temp > $station[1]) {
$station[1] = $temp;
}
} else {
$stations[$city] = [
$temp,
$temp,
$temp,
1
];
}
}
return $stations;
};
$file = 'measurements.txt';
$threads_cnt = 16;
$chunks = get_file_chunks($file, $threads_cnt);
$futures = [];
for ($i = 0; $i < $threads_cnt; $i++) {
$runtime = new \parallel\Runtime();
$futures[$i] = $runtime->run(
$process_chunk,
[
$file,
$chunks[$i][0],
$chunks[$i][1]
]
);
}
$results = [];
for ($i = 0; $i < $threads_cnt; $i++) {
// `value()` blocks until a result is available, so the main thread waits
// for the threads to finish
$chunk_result = $futures[$i]->value();
foreach ($chunk_result as $city => $measurement) {
if (isset($results[$city])) {
$result = &$results[$city];
$result[2] += $measurement[2];
$result[3] += $measurement[3];
if ($measurement[0] < $result[0]) {
$result[0] = $measurement[0];
}
if ($measurement[1] < $result[1]) {
$result[1] = $measurement[1];
}
} else {
$results[$city] = $measurement;
}
}
}
ksort($results);
echo '{', PHP_EOL;
foreach($results as $k=>&$station) {
echo "\t", $k, '=', $station[0], '/', ($station[2]/$station[3]), '/', $station[1], ',', PHP_EOL;
}
echo '}', PHP_EOL;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment