Skip to content

Instantly share code, notes, and snippets.

@huntiep
Created September 18, 2021 12:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huntiep/506175a97b0e10e586a09fcc68b62239 to your computer and use it in GitHub Desktop.
Save huntiep/506175a97b0e10e586a09fcc68b62239 to your computer and use it in GitHub Desktop.
/// Is this even Rust anymore?
use std::{fs, io, thread};
use std::arch::x86_64::*;
use std::io::Write;
const NUM_CPUS: usize = 24;
#[derive(Copy, Clone)]
struct SendPtr(*const u8);
unsafe impl Send for SendPtr {}
unsafe impl Sync for SendPtr {}
impl SendPtr {
fn add(self, rhs: usize) -> Self {
unsafe { Self(self.0.add(rhs)) }
}
fn read(self) -> u8 {
unsafe { self.0.read() }
}
fn as_ptr(self) -> *const u8 {
self.0
}
}
fn main() {
let grid_t = thread::spawn(|| { fs::read("letters.txt").unwrap() });
let grille_t = thread::spawn(|| { fs::read("mask.txt").unwrap() });
let grid = grid_t.join().unwrap();
let grille = grille_t.join().unwrap();
let mut threads = Vec::new();
let chunk = grid.len() / NUM_CPUS + 1;
for i in 0..NUM_CPUS {
let mut end = (i+1)*chunk;
if end > grid.len() {
end = grid.len();
}
let len = end - i*chunk;
let grid: SendPtr = SendPtr(grid[i*chunk..end].as_ptr());
let grille: SendPtr = SendPtr(grille[i*chunk..end].as_ptr());
threads.push(thread::spawn(move || {
let mut out: Vec = Vec::with_capacity(chunk);
let mut grille = grille;
let mut grid = grid;
let mask = unsafe { _mm256_set1_epi8(b' ' as i8) };
for _ in 0..len/32 {
unsafe {
let dat = _mm256_loadu_si256(grille.as_ptr() as *const __m256i);
let eq = _mm256_cmpeq_epi8(mask, dat);
let mut pos = _mm256_movemask_epi8(eq) as u32;
while pos != 0 {
let x = pos.trailing_zeros();
pos = pos ^ (1 << x);
out.push(grid.add(x as usize).read());
}
}
grille = grille.add(32);
grid = grid.add(32);
}
for _ in (len-len%32)..len {
if grille.read() == b' ' {
out.push(grid.read());
}
grille = grille.add(1);
grid = grid.add(1);
}
out
}));
}
for thread in threads {
io::stdout().lock().write_all(&(thread.join().unwrap())).unwrap();
}
}
/// Solution to the "grille" problem, i.e. given an x by y grid of characters and an x by y grille
/// of '#'s and ' 's, select only the characters in the grid which match a space in the grille.
/// A very simple problem with a O(n) solution. But just how fast can we go?
use std::arch::x86_64::*;
use std::{fs, io, thread};
use std::io::Write;
fn main() {
let grid_t = thread::spawn(|| { fs::read("letters.txt").unwrap() });
let grille_t = thread::spawn(|| { fs::read("mask.txt").unwrap() });
let grid = grid_t.join().unwrap();
let grille = grille_t.join().unwrap();
let len = grid.len();
let mut out = Vec::with_capacity(1 << 26);
let mask = unsafe { _mm256_set1_epi8(b' ' as i8) };
for i in 0..len/32 {
unsafe {
let dat = _mm256_loadu_si256(grille[i*32..].as_ptr() as *const __m256i);
let eq = _mm256_cmpeq_epi8(mask, dat);
let mut pos = _mm256_movemask_epi8(eq) as u32;
while pos != 0 {
let x = pos.trailing_zeros();
pos = pos ^ (1 << x);
out.push(grid[i*32+x as usize]);
}
}
}
for i in (len-len%32)..len {
if grille[i] == b' ' {
out.push(grid[i]);
}
}
io::stdout().lock().write_all(&out).unwrap();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment