Skip to content

Instantly share code, notes, and snippets.

@Coreforge
Last active April 20, 2023 18:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Coreforge/91da3d410ec7eb0ef5bc8dee24b91359 to your computer and use it in GitHub Desktop.
Save Coreforge/91da3d410ec7eb0ef5bc8dee24b91359 to your computer and use it in GitHub Desktop.
A simple (and not optimal) library to fulfill memory access requirements for BCM2711 PCIe
#include <stddef.h>
#include <stdio.h>
#include <stdint.h>
//#include <stdlib.h>
void* memcpy(void* dst, const void* src, size_t n){
volatile char* vc_src = (char*)src;
volatile char* vc_dst = (char*)dst;
// copy byte by byte, hopefully avoiding any alignment issues
size_t pos = 0;
while(pos < n){
if((size_t)(dst+pos) % 4 != 0 || (size_t)(src+pos) % 4 != 0){
// one of the addresses isn't aligned
*(vc_dst+pos) = *(vc_src+pos);
pos++;
}
if((size_t)(dst+pos) % 4 == 0 && (size_t)(src+pos) % 4 == 0 && n-pos >= 4){
// both are aligned
*(uint32_t*)(dst + pos) = *(uint32_t*)(src + pos);
pos += 4;
}
if(n-pos < 4 && n-pos != 0){
*(vc_dst+pos) = *(vc_src+pos);
pos++;
}
}
return dst;
}
void* memmove(void* dst, const void* src, size_t n){
void* tmp = __builtin_malloc(n);
memcpy(tmp,src,n);
memcpy(dst,tmp,n);
__builtin_free(tmp);
return dst;
if(dst > src){ // start at the last byte
volatile char* vc_src = (char*)src;
volatile char* vc_dst = (char*)dst;
// copy byte by byte, hopefully avoiding any alignment issues
size_t pos = n;
while(pos > 0){
*(vc_dst+pos-1) = *(vc_src+pos-1); // copy single byte
pos--;
}
} else { // start at the first byte
volatile char* vc_src = (char*)src;
volatile char* vc_dst = (char*)dst;
// copy byte by byte, hopefully avoiding any alignment issues
size_t pos = 0;
while(pos < n){
*(vc_dst+pos) = *(vc_src+pos); // copy single byte
pos++;
}
}
return dst;
}
void* memset(void* s, int c, size_t n){
volatile unsigned char* chr_ptr = (unsigned char*)s;
unsigned char dat = (unsigned char)c;
for(size_t i = 0; i < n; i++){
*(chr_ptr+i) = dat;
}
return s;
}
@geerlingguy
Copy link

Do you still have a branch in your linux src repo where you maintain the changes required to use this library?

@Coreforge
Copy link
Author

I can push my current kernel to the gpu branch on there. It's quite messy with a lot of commented out code and a bunch of printks, but I can push that.

@Coreforge
Copy link
Author

gpu-debug-5.10.y has my current code now. I'll transfer the changes to a newer kernel version once I clean them up.

@geerlingguy
Copy link

@Coreforge - That's awesome, thanks! And don't worry about printks/commented code, I just wanted to be able to see it in action, and also be able to pull down and build locally to test on my own 5950 at some point. I hope to do an update video on the graphics cards now that you have one partially working and someone else got the SM750 somewhat working too!

@geerlingguy
Copy link

geerlingguy commented Apr 15, 2022

Leaving instructions for use here (for convenience):

wget https://gist.githubusercontent.com/Coreforge/91da3d410ec7eb0ef5bc8dee24b91359/raw/1b72d428b2fe1cba459d5ae7f73663483743ff55/memcpy_unaligned.c

gcc -shared -fPIC -o memcpy.so memcpy_unaligned.c
sudo mv memcpy.so /usr/local/lib/memcpy.so
sudo nano /etc/ld.so.preload

# Put the following line inside ld.so.preload:
/usr/local/lib/memcpy.so

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment