Last active
April 20, 2023 18:41
-
-
Save Coreforge/91da3d410ec7eb0ef5bc8dee24b91359 to your computer and use it in GitHub Desktop.
A simple (and not optimal) library to fulfill memory access requirements for BCM2711 PCIe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
//#include <stdlib.h> | |
void* memcpy(void* dst, const void* src, size_t n){ | |
volatile char* vc_src = (char*)src; | |
volatile char* vc_dst = (char*)dst; | |
// copy byte by byte, hopefully avoiding any alignment issues | |
size_t pos = 0; | |
while(pos < n){ | |
if((size_t)(dst+pos) % 4 != 0 || (size_t)(src+pos) % 4 != 0){ | |
// one of the addresses isn't aligned | |
*(vc_dst+pos) = *(vc_src+pos); | |
pos++; | |
} | |
if((size_t)(dst+pos) % 4 == 0 && (size_t)(src+pos) % 4 == 0 && n-pos >= 4){ | |
// both are aligned | |
*(uint32_t*)(dst + pos) = *(uint32_t*)(src + pos); | |
pos += 4; | |
} | |
if(n-pos < 4 && n-pos != 0){ | |
*(vc_dst+pos) = *(vc_src+pos); | |
pos++; | |
} | |
} | |
return dst; | |
} | |
void* memmove(void* dst, const void* src, size_t n){ | |
void* tmp = __builtin_malloc(n); | |
memcpy(tmp,src,n); | |
memcpy(dst,tmp,n); | |
__builtin_free(tmp); | |
return dst; | |
if(dst > src){ // start at the last byte | |
volatile char* vc_src = (char*)src; | |
volatile char* vc_dst = (char*)dst; | |
// copy byte by byte, hopefully avoiding any alignment issues | |
size_t pos = n; | |
while(pos > 0){ | |
*(vc_dst+pos-1) = *(vc_src+pos-1); // copy single byte | |
pos--; | |
} | |
} else { // start at the first byte | |
volatile char* vc_src = (char*)src; | |
volatile char* vc_dst = (char*)dst; | |
// copy byte by byte, hopefully avoiding any alignment issues | |
size_t pos = 0; | |
while(pos < n){ | |
*(vc_dst+pos) = *(vc_src+pos); // copy single byte | |
pos++; | |
} | |
} | |
return dst; | |
} | |
void* memset(void* s, int c, size_t n){ | |
volatile unsigned char* chr_ptr = (unsigned char*)s; | |
unsigned char dat = (unsigned char)c; | |
for(size_t i = 0; i < n; i++){ | |
*(chr_ptr+i) = dat; | |
} | |
return s; | |
} |
@Coreforge - That's awesome, thanks! And don't worry about printks/commented code, I just wanted to be able to see it in action, and also be able to pull down and build locally to test on my own 5950 at some point. I hope to do an update video on the graphics cards now that you have one partially working and someone else got the SM750 somewhat working too!
Leaving instructions for use here (for convenience):
wget https://gist.githubusercontent.com/Coreforge/91da3d410ec7eb0ef5bc8dee24b91359/raw/1b72d428b2fe1cba459d5ae7f73663483743ff55/memcpy_unaligned.c
gcc -shared -fPIC -o memcpy.so memcpy_unaligned.c
sudo mv memcpy.so /usr/local/lib/memcpy.so
sudo nano /etc/ld.so.preload
# Put the following line inside ld.so.preload:
/usr/local/lib/memcpy.so
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
gpu-debug-5.10.y has my current code now. I'll transfer the changes to a newer kernel version once I clean them up.