Created
September 18, 2021 12:07
-
-
Save huntiep/506175a97b0e10e586a09fcc68b62239 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Is this even Rust anymore? | |
use std::{fs, io, thread}; | |
use std::arch::x86_64::*; | |
use std::io::Write; | |
const NUM_CPUS: usize = 24; | |
#[derive(Copy, Clone)] | |
struct SendPtr(*const u8); | |
unsafe impl Send for SendPtr {} | |
unsafe impl Sync for SendPtr {} | |
impl SendPtr { | |
fn add(self, rhs: usize) -> Self { | |
unsafe { Self(self.0.add(rhs)) } | |
} | |
fn read(self) -> u8 { | |
unsafe { self.0.read() } | |
} | |
fn as_ptr(self) -> *const u8 { | |
self.0 | |
} | |
} | |
fn main() { | |
let grid_t = thread::spawn(|| { fs::read("letters.txt").unwrap() }); | |
let grille_t = thread::spawn(|| { fs::read("mask.txt").unwrap() }); | |
let grid = grid_t.join().unwrap(); | |
let grille = grille_t.join().unwrap(); | |
let mut threads = Vec::new(); | |
let chunk = grid.len() / NUM_CPUS + 1; | |
for i in 0..NUM_CPUS { | |
let mut end = (i+1)*chunk; | |
if end > grid.len() { | |
end = grid.len(); | |
} | |
let len = end - i*chunk; | |
let grid: SendPtr = SendPtr(grid[i*chunk..end].as_ptr()); | |
let grille: SendPtr = SendPtr(grille[i*chunk..end].as_ptr()); | |
threads.push(thread::spawn(move || { | |
let mut out: Vec = Vec::with_capacity(chunk); | |
let mut grille = grille; | |
let mut grid = grid; | |
let mask = unsafe { _mm256_set1_epi8(b' ' as i8) }; | |
for _ in 0..len/32 { | |
unsafe { | |
let dat = _mm256_loadu_si256(grille.as_ptr() as *const __m256i); | |
let eq = _mm256_cmpeq_epi8(mask, dat); | |
let mut pos = _mm256_movemask_epi8(eq) as u32; | |
while pos != 0 { | |
let x = pos.trailing_zeros(); | |
pos = pos ^ (1 << x); | |
out.push(grid.add(x as usize).read()); | |
} | |
} | |
grille = grille.add(32); | |
grid = grid.add(32); | |
} | |
for _ in (len-len%32)..len { | |
if grille.read() == b' ' { | |
out.push(grid.read()); | |
} | |
grille = grille.add(1); | |
grid = grid.add(1); | |
} | |
out | |
})); | |
} | |
for thread in threads { | |
io::stdout().lock().write_all(&(thread.join().unwrap())).unwrap(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Solution to the "grille" problem, i.e. given an x by y grid of characters and an x by y grille | |
/// of '#'s and ' 's, select only the characters in the grid which match a space in the grille. | |
/// A very simple problem with a O(n) solution. But just how fast can we go? | |
use std::arch::x86_64::*; | |
use std::{fs, io, thread}; | |
use std::io::Write; | |
fn main() { | |
let grid_t = thread::spawn(|| { fs::read("letters.txt").unwrap() }); | |
let grille_t = thread::spawn(|| { fs::read("mask.txt").unwrap() }); | |
let grid = grid_t.join().unwrap(); | |
let grille = grille_t.join().unwrap(); | |
let len = grid.len(); | |
let mut out = Vec::with_capacity(1 << 26); | |
let mask = unsafe { _mm256_set1_epi8(b' ' as i8) }; | |
for i in 0..len/32 { | |
unsafe { | |
let dat = _mm256_loadu_si256(grille[i*32..].as_ptr() as *const __m256i); | |
let eq = _mm256_cmpeq_epi8(mask, dat); | |
let mut pos = _mm256_movemask_epi8(eq) as u32; | |
while pos != 0 { | |
let x = pos.trailing_zeros(); | |
pos = pos ^ (1 << x); | |
out.push(grid[i*32+x as usize]); | |
} | |
} | |
} | |
for i in (len-len%32)..len { | |
if grille[i] == b' ' { | |
out.push(grid[i]); | |
} | |
} | |
io::stdout().lock().write_all(&out).unwrap(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment