ab5tract/crc-getter-extended.raku

## crc-getter-extended.raku
#!/usr/bin/env raku
use v6.*;

unit sub MAIN(:$runs = 5, :$volume = 100, :$bad-packages = False);

use String::CRC32;
use NativeCall;

constant LIB = "./zig-out/lib/crc";
sub hash_crc32(Str) returns uint32 is native(LIB) { * }
class Package is repr('CStruct') {
    has uint32 $.crc32;
    has Str $.address is rw;
}
sub create_package(Str, uint32) returns Package is native(LIB) { * }
sub teardown() is native(LIB) { * }

my $package-supplier = Supplier.new;
my $package-supply   = $package-supplier.Supply;

my $output-supplier  = Supplier.new;
my $output-supply    = $output-supplier.Supply;

my $bad-address-supplier = Supplier.new;
my $bad-address-supply   = $bad-address-supplier.Supply;

my $ticker-supplier = Supplier.new;
my $package-ticker = $ticker-supplier.Supply;

my $interrupted = False;
my $time = now;
my Str $test-address = "01101011 Hyper Drive";
my uint32 $test-crc32 = hash_crc32($test-address);
my Str $bad-address = $test-address.succ;

my @packages = $bad-packages
                ?? (create_package($test-address, $test-crc32) xx ($volume - ($volume * 0.1)), create_package($bad-address, $test-crc32) xx ($volume * 0.1)).flat
                !! create_package($test-address, $test-crc32) xx $volume;
note ">>> INIT: {now - $time}s ($volume objects)";
END teardown unless $interrupted;

$package-ticker.act({
    $package-supplier.emit(@packages);
});

$package-supply.act(-> @items {
    my $time = now;
    @items.map( -> $item {
        # Uncomment for testing with failure cases
        if $item.crc32 != hash_crc32($item.address) {
            $bad-address-supplier.emit($item);
        }
    });
    $output-supplier.emit([@items, now - $time]);
});

my $count = 0;
my $bad-count = 0;
# Start the train (sleep for a fraction of a second so that react can spin up)
start { sleep 0.001; $ticker-supplier.emit(True); }
react {
    whenever $output-supply -> [@itmes, $duration] {
        say "Batch #{++$count}: {$duration}s";
        if $count == $runs {
            note "<<< $bad-count packages with bad addresses found. Alert the Elves!" if $bad-packages;
            done;
        } else {
            $ticker-supplier.emit(True);
        }
    }

    whenever $bad-address-supply -> $item {
        $bad-count++;
        # ... send to remediation queue and alert the elves!
    }

    whenever signal(SIGINT) {
        teardown;
        $interrupted = True;
        if $bad-packages {
            note "<<< $bad-count packages with bad addresses found. Alert the Elves!" if $bad-packages;
        }
        done;
    }
}

## crc-getter.raku
#!/usr/bin/env raku
use v6.*;

unit sub MAIN(:$runs = 5, :$volume = 100, :$bad-packages = False);

use String::CRC32;
use NativeCall;

my $address = "01101011 Hyper Drive";
my $crc32 = String::CRC32::crc32($address);
class Package {
    has Str $.address is rw = $address;
    has uint32 $.crc32 = $crc32;
}

# Simulating the traffic from our eventual input, a partitioned Candycane™ queue
my $package-supplier = Supplier.new;
my $package-supply   = $package-supplier.Supply;
# A dummy sink that ignores the data and prints the processing duration of the CRC32 stage
my $output-supplier  = Supplier.new;
my $output-supply    = $output-supplier.Supply;
# Any address that fails the CRC32 test goes through here
my $bad-address-supplier = Supplier.new;
my $bad-address-supply   = $bad-address-supplier.Supply;
# A tick begins processing a new batch
my $tick-supplier  = Supplier.new;
my $package-ticker = $tick-supplier.Supply;

my $time = now;
my $bad-address = $address.succ;

my @packages = $bad-packages
                ?? (Package.new xx ($volume - ($volume * 0.1)), Package.new(:address($bad-address)) xx ($volume * 0.1)).flat
                !! Package.new xx $volume;
note ">>> INIT: {now - $time}s ($volume objects)";

$package-ticker.act({
    $package-supplier.emit(@packages);
});

$package-supply.act(-> @items {
    my $time = now;
    @items.map( -> $item {
        if $item.crc32 != String::CRC32::crc32($item.address) {
            $bad-address-supplier.emit($item);
        }
    });
    $output-supplier.emit([@items, now - $time]);
});

my $count = 0;
my $bad-count = 0;
# Start the train (after waiting for the react block to spin up)
start { sleep 0.001; $tick-supplier.emit(True); }
react {
    whenever $output-supply -> [@itmes, $duration] {
        say "RUN {++$count}: {$duration}s";
        if $count == $runs {
            note "<<< $bad-count packages with bad addresses found. Alert the Elves!" if $bad-packages;
            done();
        }
        $tick-supplier.emit(True);
    }

    whenever $bad-address-supply -> $item {
        $bad-count++;
        # ... send to remediation queue and alert the elves!
    }
}

## main.zig
const std = @import("std");
const testing = std.testing;
const Crc32 = std.hash.crc.Crc32;
const span = std.mem.span;

var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
var arena = std.heap.ArenaAllocator.init(allocator);
const aa = arena.allocator();

pub const Package = extern struct {
    crc32: u32,
    address: [*:0]const u8,
};

const package_pool = std.heap.MemoryPoolExtra(Package, .{ .growable = true });
var pool = package_pool.init(allocator);

export fn hash_crc32(string: [*:0]const u8) u32 {
    return Crc32.hash(std.mem.span(string));
}

export fn create_package(address: [*:0]const u8, crc32: u32) *Package {
    const package = pool.create() catch @panic("No more memory to allocate for packages!");
    const address_copy: [*:0]const u8 =  aa.dupeZ(u8, span(address)) catch @panic("Could not create address string");
    package.* = .{
        .address = address_copy,
        .crc32 = crc32
    };
    return package;
}

export fn teardown() void {
    arena.deinit();
    pool.deinit();
}

test "basic add functionality" {
    const test_string: [*:0]const u8 = "abcd";
    try testing.expect(hash_crc32(test_string) == 3984772369);

    const test_address: [*:0]const u8 = "222 Moon Roof Blvd";
    const test_crc32: u32 = hash_crc32(test_address);
    const test_package: *Package = create_package(test_address, test_crc32);
    defer teardown();

    try testing.expect(test_package.crc32 == test_crc32);
    try testing.expect(std.mem.eql(u8, span(test_package.address), span(test_address)));
}

## run-notes..md

      
    Raw
  

              run-notes..md
            
          
    Benchmarking Raku/Zig integrations

Introduction

This research was conducted while preparing an upcoming Raku Advent Calendar post. The Raku code uses a basic supply pipeline to feed $volume objects through a validation stage that requires a CRC32 check before going to the output sink, which prints the processing time of the validation stage.
The "reaction graph" is designed to simulate a stream processing flow, where inputs arrive and depart via Candycane™ queues (that's the name of Santa's Workshop Software's queueing service, in case you weren't familiar).
The entire scenario is contrived in that CRC32 was chosen due to native implementation availability in both Raku and Zig, allowing comparison. It's not an endorsement of using CRC32 in address validation to deliver Santa's, or anyone's, packages.
Also, thanks to the very helpful folks at ziggit.dev for answering my newbie question in depth.
Methodology

The source code:

Raku - crc-getter.raku
Raku+Zig - crc-getter-extended.raku, main.zig

At larger volumes, Raku struggles with the initialization speed of the $volume objects that are instantiated. I replaced the native Raku class with one written in Zig, using the is repr('CStruct') trait in Raku and the extern struct qualifier in Zig.
In Zig I use a combination of an arena allocator (for the string passed from Raku) and a memory pool (designed to quicklymake  copies of a single type, exactly fitting our use case) to construct Package objects.
A --bad-packages option is provided by both Raku scripts, which makes 10% of the objects have a mismatched address/CRC32 pair.
The library tested was compiled with -Doptimize=ReleaseFast.
Batches are repeated $batch times, which defaults to 5.
All results from an M2 MacBook Pro.
Caveats

This test and its is only intended to reflect the case where an object is constructed in Zig based on input from Raku. It is not intended to be a test of Zig's native speed in the creation of structs.
There is a call to sleep that gives time -- 0.001 seconds --  to get the react block up and running before emitting the first True on the $ticker-supplier. This affects overall runtime but not the batch or initialization metrics.
The speed of Raku+Zig was so fast that the tool used to measure these details (cmdbench) could not find results in ps for the execution because it had already finished. These are marked as Unmeasured.
In the next iteration of this research, there sould be two additional entries in the data tables below for:

Raku+Zig: Raku-managed objects / Zig crc32
Raku+Zig: Zig-managed objects / Raku crc32

Results

10,000


Volume
Edition
Runtime
Batch Time
Initialization
Max bytes


10,000
Raku
1.072s
1: 0.146596686s
2: 0.138983732s
3: 0.142380065s
4: 0.136050775s
5: 0.134760525s
0.008991746s
180240384


10,000
Raku+Zig
0.44s
1: 0.010978411s
2: 0.006575705s
3: 0.004145623s
4: 0.004280415s
5: 0.00468929s
0.020358033s
Unmeasured


10,000
Raku
(bad-packages)
1.112s
1: 0.157788932s
2: 0.149544686s
3: 0.156293433s
4: 0.151365477s
5: 0.147947436s
0.008059955s
196263936


10,000
Raku+Zig
(bad-packages)
0.463s
1: 0.031300276s
2: 0.01006562s
3: 0.010693328s
4: 0.011056994s
5: 0.010770828s
0.010954495s
Unmeasured


Notes

The Raku+Zig solution wins in performance, but loses the initialization race. Raku is doing a decent showing in comparison to how far it has come performance-wise.
100,000


Volume
Edition
Overall
Batch Time
Initialization
Max bytes


100,000
Raku
7.163s
1: 1.360029456s
2: 1.32534014s
3: 1.353072834s
4: 1.346668338s
5: 1.351110502s
0.062402473s
210173952


100,000
Raku+Zig
0.75s
1: 0.079802007s
2: 0.073638176s
3: 0.053291894s
4: 0.05087652s
5: 0.050394687s
0.05855585s
241205248


100,000
Raku
(bad-packages)
7.89s
1: 1.496982355s
2: 1.484494027s
3: 1.497365023s
4: 1.490810525s
5: 1.492416774s
0.060026016s
209403904


100,000
Raku+Zig
(bad-packages)
1.076s
1: 0.16960934s
2: 0.111172493s
3: 0.110844786s
4: 0.113021202s
5: 0.111713535s
0.051436311s
242450432


Notes

We start to see something strange going on with the timing of the first batch of processing in the Raku+Zig implementation, where it is around double the duration of the rest of the batches.
Other than that, we see Raku+Zig take first place in everything but memory consumption, which we can assume is a function of the using the NativeCall bridge, not to mention my new-ness as a Zig programmer.
1,000,000


Volume
Edition
Overall
Batch Time
Initialization
Max bytes


1,000,000
Raku
68.081s
1: 13.475302627s
2: 13.161153845s
3: 13.293998956s
4: 13.364662217s
5: 13.474755295s
0.95481884s
417103872


1,000,000
Raku+Zig
3.758s
1: 0.788083286s
2: 0.509883905s
3: 0.492898873s
4: 0.500868284s
5: 0.498677495s
0.575087671s
514064384


1,000,000
Raku+Zig
(bad-packages)
75.796s
1: 14.940173822s
2: 14.632683637s
3: 14.866796226s
4: 15.272903792s
5: 15.027481448s
0.704549212s
396656640


1,000,000
Raku+Zig
(bad-packages)
6.553s
1: 1.362189763s
2: 1.061496504s
3: 1.069134685s
4: 1.062746049s
5: 1.061096044s
0.528011288s
462766080


Notes

Raku's native CRC32 performance is clearly lagging here. Raku+Zig keeps its domination except in the realm of memory usage. It would be hard to justify using the Raku native version strictly on its reduced memory usage, considering the performance advantage on display here
The "slow first batch" problem continues to affect Raku+Zig. Running with bad-packages enabled slows down the Raku+Zig crc32 loop, hinting that there might be some optimizations on either the Raku or the Zig/clang side of things that can't kick in when the looped data is heterogenous.
10,000,000


Volume
Edition
Runtime
Batch Time
Initialization
Max bytes


10,000,000
Raku
704.852s
1: 136.588638184s
2: 136.851019628s
3: 138.44696743s
4: 139.777040922s
5: 139.490784317s
13.299274221s
2055012352


10,000,000
Raku+Zig
38.505s
1: 8.843459877s
2: 4.84300835s
3: 4.991842433s
4: 5.077245603s
5: 4.939533707s
9.375436134s
2881126400


10,000,000
Raku
(bad-packages)
792.1s
1: 162.333803401s
2: 174.815386318s
3: 168.299796081s
4: 162.643428135s
5: 163.205406678s
10.252639311s
2124267520


10,000,000
Raku+Zig
(bad-packages)
65.174
1: 14.41616445s
2: 11.078961309s
3: 10.662389991s
4: 11.20240076s
5: 10.614430063s
6.778600235s
2861596672


Notes

Pure Raku really struggles with a volume of this order of magnitude. But if you add in just a little bit of Zig, you can reasonably supercharge Raku's capabilities.
The "slow first batch" for Raku+Zig has been appearing in more understated forms in other tests. Here the first batch is over double the runtime of the second batch. What is causing this?
100,000,000

This doesn't seem to work. At least, I'm not patient enough. The process seems to stall, growing and shrinking memory but never finishing.
Final Thoughts

This is a preliminary report in blog post form based on a contrived code sample written for another, entirely different blog post. More data and deeper analysis will have to come later.
Zig's C ABI compatibility is clearly no put on. It works seamlessly with Raku's NativeCall. Granted, we haven't really pushed the boundaries of what the C ABI can look like but one of the core takeaways is actually that with Zig we can design that interface. In other words, we are in charge of how ugly, or not, it gets. Considering how dead simple the extern struct <-> is repr('CStruct') support is, I don't think the function signatures need to get nearly as gnarly as they get in C.
Sussing the truth of that supposition out will take some time and effort in learning Zig. I'm looking forward to it. My first stop will probably be a JSON library that uses Zig. I'm also going to be looking into using Zig as the compiler for Rakudo, as it might simplify our releases significantly.
	#!/usr/bin/env raku
	use v6.*;

	unit sub MAIN(:$runs = 5, :$volume = 100, :$bad-packages = False);

	use String::CRC32;
	use NativeCall;

	constant LIB = "./zig-out/lib/crc";
	sub hash_crc32(Str) returns uint32 is native(LIB) { * }
	class Package is repr('CStruct') {
	has uint32 $.crc32;
	has Str $.address is rw;
	}
	sub create_package(Str, uint32) returns Package is native(LIB) { * }
	sub teardown() is native(LIB) { * }

	my $package-supplier = Supplier.new;
	my $package-supply = $package-supplier.Supply;

	my $output-supplier = Supplier.new;
	my $output-supply = $output-supplier.Supply;

	my $bad-address-supplier = Supplier.new;
	my $bad-address-supply = $bad-address-supplier.Supply;

	my $ticker-supplier = Supplier.new;
	my $package-ticker = $ticker-supplier.Supply;

	my $interrupted = False;
	my $time = now;
	my Str $test-address = "01101011 Hyper Drive";
	my uint32 $test-crc32 = hash_crc32($test-address);
	my Str $bad-address = $test-address.succ;

	my @packages = $bad-packages
	?? (create_package($test-address, $test-crc32) xx ($volume - ($volume * 0.1)), create_package($bad-address, $test-crc32) xx ($volume * 0.1)).flat
	!! create_package($test-address, $test-crc32) xx $volume;
	note ">>> INIT: {now - $time}s ($volume objects)";
	END teardown unless $interrupted;

	$package-ticker.act({
	$package-supplier.emit(@packages);
	});

	$package-supply.act(-> @items {
	my $time = now;
	@items.map( -> $item {
	# Uncomment for testing with failure cases
	if $item.crc32 != hash_crc32($item.address) {
	$bad-address-supplier.emit($item);
	}
	});
	$output-supplier.emit([@items, now - $time]);
	});

	my $count = 0;
	my $bad-count = 0;
	# Start the train (sleep for a fraction of a second so that react can spin up)
	start { sleep 0.001; $ticker-supplier.emit(True); }
	react {
	whenever $output-supply -> [@itmes, $duration] {
	say "Batch #{++$count}: {$duration}s";
	if $count == $runs {
	note "<<< $bad-count packages with bad addresses found. Alert the Elves!" if $bad-packages;
	done;
	} else {
	$ticker-supplier.emit(True);
	}
	}

	whenever $bad-address-supply -> $item {
	$bad-count++;
	# ... send to remediation queue and alert the elves!
	}

	whenever signal(SIGINT) {
	teardown;
	$interrupted = True;
	if $bad-packages {
	note "<<< $bad-count packages with bad addresses found. Alert the Elves!" if $bad-packages;
	}
	done;
	}
	}
	const std = @import("std");
	const testing = std.testing;
	const Crc32 = std.hash.crc.Crc32;
	const span = std.mem.span;

	var gpa = std.heap.GeneralPurposeAllocator(.{}){};
	const allocator = gpa.allocator();
	var arena = std.heap.ArenaAllocator.init(allocator);
	const aa = arena.allocator();

	pub const Package = extern struct {
	crc32: u32,
	address: [*:0]const u8,
	};

	const package_pool = std.heap.MemoryPoolExtra(Package, .{ .growable = true });
	var pool = package_pool.init(allocator);

	export fn hash_crc32(string: [*:0]const u8) u32 {
	return Crc32.hash(std.mem.span(string));
	}

	export fn create_package(address: [:0]const u8, crc32: u32) Package {
	const package = pool.create() catch @panic("No more memory to allocate for packages!");
	const address_copy: [*:0]const u8 = aa.dupeZ(u8, span(address)) catch @panic("Could not create address string");
	package.* = .{
	.address = address_copy,
	.crc32 = crc32
	};
	return package;
	}

	export fn teardown() void {
	arena.deinit();
	pool.deinit();
	}

	test "basic add functionality" {
	const test_string: [*:0]const u8 = "abcd";
	try testing.expect(hash_crc32(test_string) == 3984772369);

	const test_address: [*:0]const u8 = "222 Moon Roof Blvd";
	const test_crc32: u32 = hash_crc32(test_address);
	const test_package: *Package = create_package(test_address, test_crc32);
	defer teardown();

	try testing.expect(test_package.crc32 == test_crc32);
	try testing.expect(std.mem.eql(u8, span(test_package.address), span(test_address)));
	}
Volume	Edition	Runtime	Batch Time	Initialization	Max bytes
10,000	Raku	1.072s	1: 0.146596686s 2: 0.138983732s 3: 0.142380065s 4: 0.136050775s 5: 0.134760525s	0.008991746s	180240384
10,000	Raku+Zig	0.44s	1: 0.010978411s 2: 0.006575705s 3: 0.004145623s 4: 0.004280415s 5: 0.00468929s	0.020358033s	`Unmeasured`
10,000	Raku (`bad-packages`)	1.112s	1: 0.157788932s 2: 0.149544686s 3: 0.156293433s 4: 0.151365477s 5: 0.147947436s	0.008059955s	196263936
10,000	Raku+Zig (`bad-packages`)	0.463s	1: 0.031300276s 2: 0.01006562s 3: 0.010693328s 4: 0.011056994s 5: 0.010770828s	0.010954495s	`Unmeasured`
Volume	Edition	Overall	Batch Time	Initialization	Max bytes
100,000	Raku	7.163s	1: 1.360029456s 2: 1.32534014s 3: 1.353072834s 4: 1.346668338s 5: 1.351110502s	0.062402473s	210173952
100,000	Raku+Zig	0.75s	1: 0.079802007s 2: 0.073638176s 3: 0.053291894s 4: 0.05087652s 5: 0.050394687s	0.05855585s	241205248
100,000	Raku (`bad-packages`)	7.89s	1: 1.496982355s 2: 1.484494027s 3: 1.497365023s 4: 1.490810525s 5: 1.492416774s	0.060026016s	209403904
100,000	Raku+Zig (`bad-packages`)	1.076s	1: 0.16960934s 2: 0.111172493s 3: 0.110844786s 4: 0.113021202s 5: 0.111713535s	0.051436311s	242450432
Volume	Edition	Overall	Batch Time	Initialization	Max bytes
1,000,000	Raku	68.081s	1: 13.475302627s 2: 13.161153845s 3: 13.293998956s 4: 13.364662217s 5: 13.474755295s	0.95481884s	417103872
1,000,000	Raku+Zig	3.758s	1: 0.788083286s 2: 0.509883905s 3: 0.492898873s 4: 0.500868284s 5: 0.498677495s	0.575087671s	514064384
1,000,000	Raku+Zig (`bad-packages`)	75.796s	1: 14.940173822s 2: 14.632683637s 3: 14.866796226s 4: 15.272903792s 5: 15.027481448s	0.704549212s	396656640
1,000,000	Raku+Zig (`bad-packages`)	6.553s	1: 1.362189763s 2: 1.061496504s 3: 1.069134685s 4: 1.062746049s 5: 1.061096044s	0.528011288s	462766080
Volume	Edition	Runtime	Batch Time	Initialization	Max bytes
10,000,000	Raku	704.852s	1: 136.588638184s 2: 136.851019628s 3: 138.44696743s 4: 139.777040922s 5: 139.490784317s	13.299274221s	2055012352
10,000,000	Raku+Zig	38.505s	1: 8.843459877s 2: 4.84300835s 3: 4.991842433s 4: 5.077245603s 5: 4.939533707s	9.375436134s	2881126400
10,000,000	Raku (`bad-packages`)	792.1s	1: 162.333803401s 2: 174.815386318s 3: 168.299796081s 4: 162.643428135s 5: 163.205406678s	10.252639311s	2124267520
10,000,000	Raku+Zig (`bad-packages`)	65.174	1: 14.41616445s 2: 11.078961309s 3: 10.662389991s 4: 11.20240076s 5: 10.614430063s	6.778600235s	2861596672