kristoff-it/zigcat.zig

## zigcat.zig
const std = @import("std");

pub const io_mode = .evented;

// Loris: This is an unfortunate combination of traps that you stumbled upon.
//        Reader / Writer interfaces are generic (ie the type changes based on the underlying stream).
//        In Zig we use the term "interface" very broadly to refer to anything that fulfills a predetermined API.
//        This is very different than say, Go, where "interface" is a concrete feature of the language that
//        only offers one specific way of doing runtime polymorphism. Going back to Zig, the problem here is
//        that stdin and stdout are `std.fs.File` structs, while the socket is a `std.net.Stream`, which means
//        that their Reader / Writer types will be different. This is what led you ultimately to put `anytype`
//        in `ArgStruct`, but I can tell you that you don't need this struct at all because you can pass to
//        `runDetached` normal functions with multiple arguments. I'll show down below how to do that.

// const ArgStruct = struct {
//     // Apparently, the following two lines do not compile because of https://github.com/ziglang/zig/issues/10184
//     // (also see https://github.com/ziglang/zig/pull/4567 and https://github.com/ziglang/zig/issues/1268)
//     // in: *@TypeOf(std.io.Reader),
//     // out: *@TypeOf(std.io.Writer),
//     // Apparently, the following two lines do not compile because of https://github.com/ziglang/zig/issues/5877
//     in: anytype,
//     out: anytype,
//     buf: []u8,
//     loop: std.event.Loop,
//     alloc: std.mem.Allocator,
// };

pub fn main() !void {
    // Shortcut to standard input and output
    const stdin = std.io.getStdIn().reader();
    const stdout = std.io.getStdOut().writer();

    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const alloc = gpa.allocator();

    // Create iterator on argument vector
    var arg_it = std.process.args();
    defer arg_it.deinit();
    _ = arg_it.skip(); // skip name of executable

    // Loris: I'm not exactly clear with what kind of error you encountered originally, but this works
    //        without requiring workarounds. As an unrelated comment, just to provide some infromation,
    //        this API requires an allocator only because of Windows. Windows uses UTF16 strings which
    //        nobody ever wants so use, so this "general" API creates a UTF8 copy of each string for
    //        your convenience. The allocator is unused otherwise.
    //        The parenthesis are required to make `try` (error unwrap) happen before `orelse` (optional unwrap).
    var hostname = (try arg_it.next(alloc)) orelse {
        std.debug.print("Expected first argument to be the hostname\n", .{});
        return error.InvalidArgs;
    };

    var port = (try arg_it.next(alloc)) orelse {
        std.debug.print("Expected second argument to be the port\n", .{});
        return error.InvalidArgs;
    };
    var port_number = try std.fmt.parseInt(u16, port, 10);

    var sockStream = try std.net.tcpConnectToHost(alloc, hostname, port_number);
    var netIn = sockStream.reader();
    var netOut = sockStream.writer();

    // Loris: The rest of this code needs to be a bit different. First of all, by using `io_mode = .evented`,
    //        Zig will spin up the event loop for you, so you don't need to do it manually.
    //        Secondly, we're going to use a read-buffer-write implementation that the stdlib has, which also
    //        performs these read-write operations in a loop, which means we won't need to have the two tasks
    //        re-schedule themselves.
    //        Lastly, we're not going to use `runDetached`. `runDetached` is meant to be used in situations where
    //        you need to have a dynamic number of independent threads of execution to happen concurrently, like in a
    //        web server that spawns an async task for every request it receives. In our case we only need 2
    //        independent threads of execution, so we can use async / await.

    // var buf1: [4096]u8 = undefined;
    // var buf2: [4096]u8 = undefined;
    // var loop: std.event.Loop = undefined;
    // try loop.initMultiThreaded();
    // defer loop.deinit();

    // Loris: Even if we're not going to use `runDetached`, I've changed the signature of `readAndForward` to "splat" the
    //        arguments that were previously all put inside a struct. This is how you would invoke it now:
    //
    //        try loop.runDetached(alloc, readAndForward, .{ stdin, netOut });
    //        try loop.runDetached(alloc, readAndForward, .{ netIn, stdout });
    //

    // try loop.runDetached(alloc, readAndForward, ArgStruct{ .in = netIn, .out = stdout, .buf = &buf1, .loop = loop, .allocator = alloc });
    // try loop.runDetached(alloc, readAndForward, ArgStruct{ .in = &stdin, .out = &netOut, .buf = &buf2, .loop = loop, .allocator = alloc });

    // loop.run();

    var in_to_out = async readAndForward(stdin, netOut);
    var out_to_in = async readAndForward(netIn, stdout);

    // Loris: the order in which we await each async frame doesn't matter from the perspective of concurrency,
    //        both tasks were already kicked off when whe called each function.
    try await in_to_out;
    try await out_to_in;

    // Loris: ** CAVEATS **
    //        So, this works, but it's far from perfect. First of all you probably would want to see if you
    //        can find OS-specific APIs that could perform this piping directly from kernel space to avoid
    //        copying bytes at all. Secondly, this current code relies on the stdlib event loop which is
    //        just a proof of concept implementation at the moment which, among other things, has the problem
    //        that right now reading and writing in async mode from the same file descriptor at the same time
    //        doesn't work correctly on Linux. Why? Because epoll, the kernel API that powers the event loop
    //        on Linux, doesn't support registering two separate event filters (one for read, one for write) on
    //        the same file descriptor. We need to implement a workaround for that, but haven't done so yet.
    //        That said, we have recently got a new core team member who is very keen to work on the event loop
    //        and hopefully we should have something better in the near future.
    //
    //        So how do you make it work in the meantime? Given how simple it is, you could replace the two async
    //        calls with two threads, disable evented I/O, since you don't really need it much, and there you go.
    //        If instead you're more interested in learning how to do async/await in Zig, I had a lot of fun
    //        writing a Redis client for Zig, so I highly recommend it.
}

// // See https://github.com/ziglang/zig/issues/6515
// fn getNextArg(arg_it: *std.process.ArgIterator, allocator: std.mem.Allocator) ?anyerror![]u8 {
//     var arg = arg_it.next(allocator);
//     return @as(?anyerror![]u8, arg);
// }

fn readAndForward(reader: anytype, writer: anytype) !void {
    var buf: [std.mem.page_size]u8 = undefined;
    // Loris: see https://github.com/ziglang/zig/blob/master/lib/std/fifo.zig
    var fifo = std.fifo.LinearFifo(u8, .Slice).init(&buf);
    defer fifo.deinit();
    try fifo.pump(reader, writer);
}
	const std = @import("std");

	pub const io_mode = .evented;

	// Loris: This is an unfortunate combination of traps that you stumbled upon.
	// Reader / Writer interfaces are generic (ie the type changes based on the underlying stream).
	// In Zig we use the term "interface" very broadly to refer to anything that fulfills a predetermined API.
	// This is very different than say, Go, where "interface" is a concrete feature of the language that
	// only offers one specific way of doing runtime polymorphism. Going back to Zig, the problem here is
	// that stdin and stdout are `std.fs.File` structs, while the socket is a `std.net.Stream`, which means
	// that their Reader / Writer types will be different. This is what led you ultimately to put `anytype`
	// in `ArgStruct`, but I can tell you that you don't need this struct at all because you can pass to
	// `runDetached` normal functions with multiple arguments. I'll show down below how to do that.

	// const ArgStruct = struct {
	// // Apparently, the following two lines do not compile because of https://github.com/ziglang/zig/issues/10184
	// // (also see https://github.com/ziglang/zig/pull/4567 and https://github.com/ziglang/zig/issues/1268)
	// // in: *@TypeOf(std.io.Reader),
	// // out: *@TypeOf(std.io.Writer),
	// // Apparently, the following two lines do not compile because of https://github.com/ziglang/zig/issues/5877
	// in: anytype,
	// out: anytype,
	// buf: []u8,
	// loop: std.event.Loop,
	// alloc: std.mem.Allocator,
	// };

	pub fn main() !void {
	// Shortcut to standard input and output
	const stdin = std.io.getStdIn().reader();
	const stdout = std.io.getStdOut().writer();

	var gpa = std.heap.GeneralPurposeAllocator(.{}){};
	const alloc = gpa.allocator();

	// Create iterator on argument vector
	var arg_it = std.process.args();
	defer arg_it.deinit();
	_ = arg_it.skip(); // skip name of executable

	// Loris: I'm not exactly clear with what kind of error you encountered originally, but this works
	// without requiring workarounds. As an unrelated comment, just to provide some infromation,
	// this API requires an allocator only because of Windows. Windows uses UTF16 strings which
	// nobody ever wants so use, so this "general" API creates a UTF8 copy of each string for
	// your convenience. The allocator is unused otherwise.
	// The parenthesis are required to make `try` (error unwrap) happen before `orelse` (optional unwrap).
	var hostname = (try arg_it.next(alloc)) orelse {
	std.debug.print("Expected first argument to be the hostname\n", .{});
	return error.InvalidArgs;
	};

	var port = (try arg_it.next(alloc)) orelse {
	std.debug.print("Expected second argument to be the port\n", .{});
	return error.InvalidArgs;
	};
	var port_number = try std.fmt.parseInt(u16, port, 10);

	var sockStream = try std.net.tcpConnectToHost(alloc, hostname, port_number);
	var netIn = sockStream.reader();
	var netOut = sockStream.writer();

	// Loris: The rest of this code needs to be a bit different. First of all, by using `io_mode = .evented`,
	// Zig will spin up the event loop for you, so you don't need to do it manually.
	// Secondly, we're going to use a read-buffer-write implementation that the stdlib has, which also
	// performs these read-write operations in a loop, which means we won't need to have the two tasks
	// re-schedule themselves.
	// Lastly, we're not going to use `runDetached`. `runDetached` is meant to be used in situations where
	// you need to have a dynamic number of independent threads of execution to happen concurrently, like in a
	// web server that spawns an async task for every request it receives. In our case we only need 2
	// independent threads of execution, so we can use async / await.

	// var buf1: [4096]u8 = undefined;
	// var buf2: [4096]u8 = undefined;
	// var loop: std.event.Loop = undefined;
	// try loop.initMultiThreaded();
	// defer loop.deinit();

	// Loris: Even if we're not going to use `runDetached`, I've changed the signature of `readAndForward` to "splat" the
	// arguments that were previously all put inside a struct. This is how you would invoke it now:
	//
	// try loop.runDetached(alloc, readAndForward, .{ stdin, netOut });
	// try loop.runDetached(alloc, readAndForward, .{ netIn, stdout });
	//

	// try loop.runDetached(alloc, readAndForward, ArgStruct{ .in = netIn, .out = stdout, .buf = &buf1, .loop = loop, .allocator = alloc });
	// try loop.runDetached(alloc, readAndForward, ArgStruct{ .in = &stdin, .out = &netOut, .buf = &buf2, .loop = loop, .allocator = alloc });

	// loop.run();

	var in_to_out = async readAndForward(stdin, netOut);
	var out_to_in = async readAndForward(netIn, stdout);

	// Loris: the order in which we await each async frame doesn't matter from the perspective of concurrency,
	// both tasks were already kicked off when whe called each function.
	try await in_to_out;
	try await out_to_in;

	// Loris: CAVEATS
	// So, this works, but it's far from perfect. First of all you probably would want to see if you
	// can find OS-specific APIs that could perform this piping directly from kernel space to avoid
	// copying bytes at all. Secondly, this current code relies on the stdlib event loop which is
	// just a proof of concept implementation at the moment which, among other things, has the problem
	// that right now reading and writing in async mode from the same file descriptor at the same time
	// doesn't work correctly on Linux. Why? Because epoll, the kernel API that powers the event loop
	// on Linux, doesn't support registering two separate event filters (one for read, one for write) on
	// the same file descriptor. We need to implement a workaround for that, but haven't done so yet.
	// That said, we have recently got a new core team member who is very keen to work on the event loop
	// and hopefully we should have something better in the near future.
	//
	// So how do you make it work in the meantime? Given how simple it is, you could replace the two async
	// calls with two threads, disable evented I/O, since you don't really need it much, and there you go.
	// If instead you're more interested in learning how to do async/await in Zig, I had a lot of fun
	// writing a Redis client for Zig, so I highly recommend it.
	}

	// // See https://github.com/ziglang/zig/issues/6515
	// fn getNextArg(arg_it: *std.process.ArgIterator, allocator: std.mem.Allocator) ?anyerror![]u8 {
	// var arg = arg_it.next(allocator);
	// return @as(?anyerror![]u8, arg);
	// }

	fn readAndForward(reader: anytype, writer: anytype) !void {
	var buf: [std.mem.page_size]u8 = undefined;
	// Loris: see https://github.com/ziglang/zig/blob/master/lib/std/fifo.zig
	var fifo = std.fifo.LinearFifo(u8, .Slice).init(&buf);
	defer fifo.deinit();
	try fifo.pump(reader, writer);
	}