Last active
July 19, 2024 15:58
-
-
Save Rexicon226/80048f4ccd68d0e5fce927bbbad44226 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S | |
export fn memcpy(maybe_dest: ?[*]u8, maybe_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 { | |
if (@expect(len == 0, false)) { | |
return maybe_dest; | |
} | |
const dest = maybe_dest.?; | |
const src = maybe_src.?; | |
if (@expect(len < 8, false)) { | |
if (@expect(len == 1, false)) { | |
dest[0] = src[0]; | |
} else if (@expect(len >= 4, false)) { | |
blockCopy(dest, src, 4, len); | |
} else { | |
blockCopy(dest, src, 2, len); | |
} | |
return dest; | |
} | |
if (@expect(len > 32, false)) { | |
if (@expect(len > 256, false)) { | |
copyMove(dest, src, len); | |
return dest; | |
} | |
copyLong(dest, src, len); | |
return dest; | |
} | |
if (@expect(len > 16, false)) { | |
blockCopy(dest, src, 16, len); | |
return dest; | |
} | |
blockCopy(dest, src, 8, len); | |
return dest; | |
} | |
inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void { | |
const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*; | |
const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*; | |
dest[0..block_size].* = first; | |
dest[len - block_size ..][0..block_size].* = second; | |
} | |
inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void { | |
var array: [8]@Vector(32, u8) = undefined; | |
inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| { | |
array[i * 2] = src[(N / 2) - 32 ..][0..32].*; | |
array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*; | |
if (@expect(len <= N, false)) { | |
for (0..i + 1) |j| { | |
dest[j * 32 ..][0..32].* = array[j * 2]; | |
dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1]; | |
} | |
return; | |
} | |
} | |
} | |
inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void { | |
if (@expect(@intFromPtr(src) >= @intFromPtr(dest), false)) { | |
copyForward(dest, src, len); | |
} else if (@expect(@intFromPtr(src) + len > @intFromPtr(dest), false)) { | |
overlapBwd(dest, src, len); | |
} else { | |
copyForward(dest, src, len); | |
} | |
} | |
inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void { | |
const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*; | |
const N: usize = len & ~@as(usize, 127); | |
var i: usize = 0; | |
while (i < N) : (i += 128) { | |
dest[i..][0..32].* = src[i..][0..32].*; | |
dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*; | |
dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*; | |
dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*; | |
} | |
if (@expect(len - i <= 32, true)) { | |
dest[len - 32 ..][0..32].* = tail; | |
} else { | |
copyLong(dest[i..], src[i..], len - i); | |
} | |
} | |
inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void { | |
var array: [5]@Vector(32, u8) = undefined; | |
array[0] = src[len - 32 ..][0..32].*; | |
inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*; | |
const end: usize = (@intFromPtr(dest) + len - 32) & 31; | |
const range = len - end; | |
var s = src + range; | |
var d = dest + range; | |
while (@intFromPtr(s) > @intFromPtr(src + 128)) { | |
// zig fmt: off | |
const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*; | |
const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*; | |
const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*; | |
const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*; | |
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first; | |
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second; | |
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third; | |
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth; | |
// zig fmt: on | |
s -= 128; | |
d -= 128; | |
} | |
inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec; | |
dest[len - 32 ..][0..32].* = array[0]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment