-
-
Save cfsamson/4b10bd4e1e828f71405418513cc5b880 to your computer and use it in GitHub Desktop.
#![feature(asm)] | |
// Lets set a small stack size here, only 48 bytes so we can print the stack | |
// and look at it before we switch contexts | |
// ===== NOTICE FOR OSX USERS ===== | |
// You'll need to increase this size to at least 624 bytes. This will work in Rust Playground and on Windows | |
// but the extremely small stack seems to have an issue on OSX. | |
const SSIZE: isize = 48; | |
/// Do you recognize these? It's the registers described in the x86-64 ABI that we'll need to save our context. | |
/// Note that this needs to be #[repr(C)] because we access the data the way we do in our assembly. Rust doesn't have a | |
/// stable ABI so there is no way for us to be sure that this will be represented in memory with `rsp` as the first 8 bytes. | |
/// C has a stable ABI we can use. | |
#[derive(Debug, Default)] | |
#[repr(C)] | |
struct ThreadContext { | |
rsp: u64, | |
r15: u64, | |
r14: u64, | |
r13: u64, | |
r12: u64, | |
rbx: u64, | |
rbp: u64, | |
} | |
fn hello() -> ! { | |
println!("I LOVE WAKING UP ON A NEW STACK!"); | |
loop {} | |
} | |
// We use a trick here. We push the address to our own stack to the rsp register. The ret keyword transfers program control | |
// to the return address located on top of the stack. Since we pushed our address there it returns directly into our | |
// function. | |
unsafe fn gt_switch(new: *const ThreadContext) { | |
asm!(" | |
mov 0x00($0), %rsp | |
ret | |
" | |
: | |
: "r"(new) | |
: | |
: "alignstack" // it will work without this now, but we need it for it to work on windows later | |
); | |
} | |
fn main() { | |
let mut ctx = ThreadContext::default(); | |
// This will be our stack. Note that it's very important that we don't `push` to this array since it can trigger an | |
// expansion that will relocate all the data and our pointers will no longer be valid | |
let mut stack = vec![0_u8; SSIZE as usize]; | |
unsafe { | |
// this returns the pointer to the memory for our Vec, we offset it so | |
// we get the "high" address which will be the bottom of our stack. | |
let stack_bottom = stack.as_mut_ptr().offset(SSIZE); | |
// make sure our stack itself is 16 byte aligned - it will always | |
// offset to a lower memory address. Since we know we're at the "high" | |
// memory address of our allocated space, we know that offsetting to | |
// a lower one will be a valid address (given that we actually allocated) | |
// enough space to actually get an aligned pointer in the first place). | |
let sb_aligned = (stack_bottom as usize & ! 15) as *mut u8; | |
// So this is actually designing our stack. `hello` is a pointer already (a function pointer) so we can cast it | |
// directly as an u64 since all pointers ono 64 bits systems will be, well, 64 bit ;) | |
// | |
// Then we write this pointer to our stack. Make note that we cast the pointer to to the offset of 16 bytes | |
// (remember what I wrote about 16 byte alignment?). And that we cast it as a pointer to an u64 instead of an u8 | |
// We want to write to position 32, 33, 34, 35, 36, 37, 38, 39, 40 which is the 8 byte space we need to store our | |
// u64. | |
std::ptr::write(sb_aligned.offset(-16) as *mut u64, hello as u64); | |
// We set the "rsp" (Stack Pointer) to *point to* the first byte of our address, we don't pass the value of the | |
// u64, but an address to the first byte. | |
ctx.rsp = sb_aligned.offset(-16) as u64; | |
// we switch over to our new stack | |
gt_switch(&mut ctx); | |
} | |
} |
#![feature(asm)] | |
#![feature(naked_functions)] | |
use std::io::Write; | |
const SSIZE: isize = 1024; | |
static mut S_PTR: *const u8 = 0 as *const u8; | |
#[derive(Debug, Default)] | |
#[repr(C)] | |
struct ThreadContext { | |
rsp: u64, | |
r15: u64, | |
r14: u64, | |
r13: u64, | |
r12: u64, | |
rbx: u64, | |
rbp: u64, | |
} | |
fn print_stack(filename: &str) { | |
let mut f = std::fs::File::create(filename).unwrap(); | |
unsafe { | |
for i in (0..SSIZE).rev() { | |
writeln!( | |
f, | |
"mem: {}, val: {}", | |
S_PTR.offset(i as isize) as usize, | |
*S_PTR.offset(i as isize) | |
) | |
.expect("Error writing to file."); | |
} | |
} | |
} | |
fn hello() { | |
println!("I LOVE WAKING UP ON A NEW STACK!"); | |
print_stack("AFTER.txt"); | |
loop {} | |
} | |
unsafe fn gt_switch(new: *const ThreadContext) { | |
asm!(" | |
mov 0x00($0), %rsp | |
ret | |
" | |
: | |
: "r"(new) | |
: | |
: "alignstack" | |
); | |
} | |
fn main() { | |
let mut ctx = ThreadContext::default(); | |
let mut stack = vec![0_u8; SSIZE as usize]; | |
let stack_ptr = stack.as_mut_ptr(); | |
unsafe { | |
S_PTR = stack_ptr; | |
std::ptr::write(stack_ptr.offset(SSIZE - 16) as *mut u64, hello as u64); | |
print_stack("BEFORE.txt"); | |
ctx.rsp = stack_ptr.offset(SSIZE - 16) as u64; | |
gt_switch(&mut ctx); | |
} | |
} |
@cfsamson Sorry I couldn't reply earlier.
At the time I was using VS Code.
At first I thought this was an issue about how tolerant asm!()
is with whitespaces, but now it might be more something OS-related?
Because since I submitted my first post, I've tried running it with different combinations of whitespaces and it would run successfully at random. Then I found out that the same compiled program won't work for a couple of runs, but then suddenly gives the correct output:
PS C:\Users\Sarkom\green_threads> cargo run
Finished dev [unoptimized + debuginfo] target(s) in 0.01s
Running `target\debug\green_threads.exe`
error: process didn't exit successfully: `target\debug\green_threads.exe` (exit code: 0xc0000374, STATUS_HEAP_CORRUPTION)
PS C:\Users\Sarkom\green_threads> cargo run
Finished dev [unoptimized + debuginfo] target(s) in 0.01s
Running `target\debug\green_threads.exe`
error: process didn't exit successfully: `target\debug\green_threads.exe` (exit code: 0xc0000005, STATUS_ACCESS_VIOLATION)
PS C:\Users\Sarkom\green_threads> cargo run
Finished dev [unoptimized + debuginfo] target(s) in 0.01s
Running `target\debug\green_threads.exe` <<<<<<<<<<<<<<<<<<<< here I Ctrl+C out of the program
PS C:\Users\Sarkom\green_threads> cargo run
Finished dev [unoptimized + debuginfo] target(s) in 0.01s
Running `target\debug\green_threads.exe`
I LOVE WAKING UP ON A NEW STACK
¯\_(ツ)_/¯
Nothing major, but something to be aware of I guess. The behaviour is the same with cargo run --release
.
@clemarescx OK, I see. Yes, you might be right. I don't think the whitespace is the problem. Two main suspects is the extremely small stack (on macos it wont run unless stack is more than 624 bytes) and/or that the nt_tib
data is not stored (see the Supporting Windows Appendix for more detail). If you increase the stack size to i.e. 1024 I guess that would fix this most of the time. Strangely it runs fine on several conceutive runs on my Win 10 box, but I know that this could be an issue.
Oddly enough, a stack size of exactly 624 bytes yields Segmentation fault: 11
on OSX. A stack size of 623 bytes runs properly... at least for me. All this is in debug mode.
@clemarescx Thanks for reporting. I just re-tried this on my Windows PC and it works fine there just copy pasting it over so I think there must be something happening in the copy/paste process. It might not be encoded as valid "white space" or a valid "tab" when pasting it. I tried both tab and multiple white spaces and both works fine for me.
What editor are you using? I'm not sure there is really much I can do except edit it to one space and see if that migitates this potential problem :)