Skip to content

Instantly share code, notes, and snippets.

@divinity76
Created January 25, 2024 05:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save divinity76/5729472dd5d77e94cd0acb245aac2226 to your computer and use it in GitHub Desktop.
Save divinity76/5729472dd5d77e94cd0acb245aac2226 to your computer and use it in GitHub Desktop.
blake3 portable vs optimized
<?php
declare(strict_types=1);
/**
* better version of shell_exec(),
* supporting both stdin and stdout and stderr and os-level return code
*
* @param string $cmd
* command to execute
* @param string $stdin
* (optional) data to send to stdin, binary data is supported.
* @param string $stdout
* (optional) stdout data generated by cmd
* @param string $stderr
* (optional) stderr data generated by cmd
* @param bool $print_std
* (optional, default false) if you want stdout+stderr to be printed while it's running,
* set this to true. (useful for long-running commands)
* @return int
*/
function hhb_exec(string $cmd, string $stdin = "", string &$stdout = null, string &$stderr = null, bool $print_std = false): int
{
$stdouth = tmpfile();
$stderrh = tmpfile();
$descriptorspec = array(
0 => array(
"pipe",
"rb"
), // stdin
1 => array(
"file",
stream_get_meta_data($stdouth)['uri'],
'ab'
),
2 => array(
"file",
stream_get_meta_data($stderrh)['uri'],
'ab'
)
);
$pipes = array();
$proc = proc_open($cmd, $descriptorspec, $pipes);
while (strlen($stdin) > 0) {
$written_now = fwrite($pipes[0], $stdin);
if ($written_now < 1 || $written_now === strlen($stdin)) {
// ... can add more error checking here
break;
}
$stdin = substr($stdin, $written_now);
}
fclose($pipes[0]);
unset($stdin, $pipes[0]);
if (! $print_std) {
$proc_ret = proc_close($proc); // this line will stall until the process has exited.
$stdout = stream_get_contents($stdouth);
$stderr = stream_get_contents($stderrh);
} else {
$stdout = "";
$stderr = "";
stream_set_blocking($stdouth, false);
stream_set_blocking($stderrh, false);
$fetchstd = function () use (&$stdout, &$stderr, &$stdouth, &$stderrh): bool {
$ret = false;
$tmp = stream_get_contents($stdouth); // fread($stdouth, 1); //
if (is_string($tmp) && strlen($tmp) > 0) {
$ret = true;
$stdout .= $tmp;
fwrite(STDOUT, $tmp);
}
$tmp = stream_get_contents($stderrh);// fread($stderrh, 1); //
// var_dump($tmp);
if (is_string($tmp) && strlen($tmp) > 0) {
$ret = true;
$stderr .= $tmp;
fwrite(STDERR, $tmp);
}
return $ret;
};
while (($status = proc_get_status($proc))["running"]) {
if (! $fetchstd()) {
// 100 ms
usleep(100 * 1000);
}
}
$proc_ret = $status["exitcode"];
proc_close($proc);
$fetchstd();
}
fclose($stdouth);
fclose($stderrh);
return $proc_ret;
}
function exec2(string $cmd, ?string &$stdout = null, ?string &$stderr = null): void
{
echo "$cmd\n";
$ret = hhb_exec($cmd, "", $stdout, $stderr, true);
if ($ret !== 0) {
throw new Exception("Error: $ret: $cmd");
}
}
function exec3(string $cmds): void
{
$cmd = "/bin/bash -c " . escapeshellarg($cmds);
exec2($cmd);
}
function calculateThroughputMBPS(int $timeInNanoseconds) {
$sizeInKB = 16; // Size of the data in KB
$sizeInMB = $sizeInKB / 1024; // Convert KB to MB (16KB = 0.015625MB)
$timeInSeconds = $timeInNanoseconds / 1000000000; // Convert nanoseconds to seconds
$throughput = $sizeInMB / $timeInSeconds; // Calculate throughput in MB/s
return $throughput;
}
error_reporting(E_ALL);
ini_set("display_errors", "1");
set_error_handler(function ($errno, $errstr, $errfile, $errline) {
if (error_reporting() & $errno) {
throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
}
});
if (!is_dir("upstream_blake3")) {
exec3(
<<<'CMD'
# fancy way of just fetching the "c" folder (the only thing we want)
git clone --branch '1.5.0' -n --depth=1 --filter=tree:0 'https://github.com/BLAKE3-team/BLAKE3.git' 'upstream_blake3'
cd upstream_blake3
git sparse-checkout set --no-cone c
git checkout
rm -rf .git
cd c
# some stuff we don't need
rm -rf blake3_c_rust_bindings test.py example.c main.c Makefile.testing CMakeLists.txt blake3-config.cmake.in README.md .gitignore
CMD
);
}
$iterations = 999;
// EXT_HASH_BLAKE3_SOURCES="hash_blake3.c blake3/upstream_blake3/c/blake3.c blake3/upstream_blake3/c/blake3_dispatch.c blake3/upstream_blake3/c/blake3_portable.c"
$tests = array(
"O2-portable" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
"O2-portable-march" => "gcc -O2 -march=native -mtune=native -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
"O2-sse2" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
"O2-sse41" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2",
"O2-avx2" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S -DBLAKE3_NO_AVX512",
"O2-avx512" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S",
);
$results = array();
foreach ($tests as $test_name => $cmd) {
exec2($cmd);
exec2("./test $iterations", $stdout, $stderr);
$stdout = trim($stdout);
$stdout_parsed = filter_var($stdout, FILTER_VALIDATE_INT);
if ($stdout_parsed === false) {
throw new Exception("Error: could not parse $stdout as float");
}
$microseconds_for_16_kib = $stdout_parsed;
$mb_per_second = calculateThroughputMBPS($microseconds_for_16_kib);
$results[$test_name] = [
"microseconds_for_16_kib" => $microseconds_for_16_kib,
"mb_per_second" => $mb_per_second,
];
}
uksort($results, function ($a, $b) use ($results) {
return $results[$a]["mb_per_second"] <=> $results[$b]["mb_per_second"];
});
var_dump($results);
$ time php b3instructions.php
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
./test 999
13876
gcc -O2 -march=native -mtune=native -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
./test 999
29295
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
./test 999
4969
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
./test 999
4688
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S -DBLAKE3_NO_AVX512
./test 999
2384
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S
./test 999
1753
array(6) {
["O2-portable-march"]=>
array(2) {
["microseconds_for_16_kib"]=>
int(29295)
["mb_per_second"]=>
float(533.3674688513398)
}
["O2-portable"]=>
array(2) {
["microseconds_for_16_kib"]=>
int(13876)
["mb_per_second"]=>
float(1126.0449697319111)
}
["O2-sse2"]=>
array(2) {
["microseconds_for_16_kib"]=>
int(4969)
["mb_per_second"]=>
float(3144.4958744214127)
}
["O2-sse41"]=>
array(2) {
["microseconds_for_16_kib"]=>
int(4688)
["mb_per_second"]=>
float(3332.977815699659)
}
["O2-avx2"]=>
array(2) {
["microseconds_for_16_kib"]=>
int(2384)
["mb_per_second"]=>
float(6554.1107382550335)
}
["O2-avx512"]=>
array(2) {
["microseconds_for_16_kib"]=>
int(1753)
["mb_per_second"]=>
float(8913.291500285226)
}
}
real 0m3.017s
user 0m1.913s
sys 0m0.105s
// compile: gcc -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_NEON -DBLAKE3_NO_SSE41
// gcc -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "upstream_blake3/c/blake3.h"
#include <sys/time.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
int64_t nanoseconds()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return (int64_t)ts.tv_sec * 1000000000 + (int64_t)ts.tv_nsec;
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("Usage: %s <iterations>\n", argv[0]);
return 1;
}
int iterations = atoi(argv[1]);
char teststr[16 * 1024] = "Hello World!"; // 16kb is an important size: it's the size of the TLS record buffer.
int64_t best = INT64_MAX;
for (int i = 0; i < iterations; i++)
{
int64_t start = nanoseconds();
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, teststr, sizeof(teststr));
uint8_t output[BLAKE3_OUT_LEN];
blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
int64_t end = nanoseconds();
int64_t elapsed = end - start;
if (elapsed < best)
{
best = elapsed;
}
}
//printf("Best time: %ld nanoseconds\n", best);
printf("%ld\n", best);
printf("\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment