Skip to content

Instantly share code, notes, and snippets.

@TroyKomodo
Created January 12, 2024 22:40
Show Gist options
  • Save TroyKomodo/1255e2ebbc6007cf3a5161fe861a3e3d to your computer and use it in GitHub Desktop.
Save TroyKomodo/1255e2ebbc6007cf3a5161fe861a3e3d to your computer and use it in GitHub Desktop.

AMD Alveo MA35 Ubuntu 23.x driver patch

Steps

  1. Assuming you have followed the guide here and are now on the "Install the AMD AMA Video SDK" step.

Start by installing the following

sudo apt -y install dkms libhugetlbfs0 libboost-all-dev
dpkg --get-selections '*ma35*' | awk '{system("sudo apt -y purge " $1)}'
dpkg --get-selections 'amd-ama*' | awk '{system("sudo apt -y purge " $1)}'
sudo apt update
sudo apt install amd-ama-core

Note we only install amd-ama-core

  1. Download the driver from the README or here.
wget https://www.xilinx.com/content/dam/xilinx/guest-resources/2023/video/ma35d_sdk_v1.0_linux_kernel_driver.zip -O /tmp/ma35d_sdk_v1.0_linux_kernel_driver.zip
  1. Unzip the download
sudo mkdir /usr/src/amd-ama-kmod-1.0.0
sudo unzip /tmp/ma35d_sdk_v1.0_linux_kernel_driver.zip -d /usr/src/amd-ama-kmod-1.0.0
  1. Apply the git diff
cd /usr/src/amd-ama-kmod-1.0.0
sudo git apply ma35d_sdk_v1.0_linux_kernel_driver_linux_6_5.diff
  1. Build & Install the module
sudo dkms build amd-ama-kmod/1.0.0 --force
sudo dkms install amd-ama-kmod/1.0.0 --force
  1. Copy over the firmware
wget https://packages.xilinx.com/artifactory/debian-packages/pool/amd-ama-driver_1.0.0-amd64.deb -O /tmp/amd-ama-driver_1.0.0-amd64.deb
mkdir /tmp/amd-ama-driver_1.0.0-amd64
dpkg-deb -x /tmp/amd-ama-driver_1.0.0-amd64.deb amd-ama-driver_1.0.0-amd64
sudo cp /tmp/amd-ama-driver_1.0.0-amd64/lib/firmware/* /lib/firmware -r
  1. Done

You now should reboot your machine and see if the driver loads correctly.

Flashing (optional)

You may need to flash the firmware. On my machine the maflash program provided in amd-ama-core segfaults. I compiled it from source and was able to flash.

  1. Download the source file from here
wget https://www.xilinx.com/content/dam/xilinx/guest-resources/2023/video/ma35d_sdk_v1.0_ddbi.zip -O /tmp/ma35d_sdk_v1.0_ddbi.zip
  1. Extract the source
mkdir /tmp/ma35d_sdk_v1.0_ddbi
unzip /tmp/ma35d_sdk_v1.0_ddbi.zip -d /tmp/ma35d_sdk_v1.0_ddbi
  1. Apply the patch
cd /tmp/ma35d_sdk_v1.0_ddbi/ma35_ddbi-libs
git apply ma35d_sdk_v1.0_ddbi.diff
  1. Install Libs
sudo apt install libzip-dev nlohmann-json3-dev libpci-dev
  1. Build the flash tool
g++ -o maflash app/maflash/app/main.cpp app/maflash/lib/FlashDevice.cpp app/maflash/lib/MaFlash.cpp -Iapp/maflash/lib/include -std=c++20 -lzip -lpci -lssl -lcrypto
  1. Flash the Board

You can now continue to follow the steps here using the newly compiled binary.

If you want the steps to be identical you can move this binary to the location of the old one.

sudo mv maflash /opt/amd/ama/ma35/bin/maflash
diff --git a/app/maflash/lib/MaFlash.cpp b/app/maflash/lib/MaFlash.cpp
index c1e0191..2d8d057 100644
--- a/app/maflash/lib/MaFlash.cpp
+++ b/app/maflash/lib/MaFlash.cpp
@@ -21,7 +21,7 @@
#include <algorithm>
#include <future>
-#include <Logging.h>
+// #include <Logging.h>
#include <zip.h>
@@ -135,7 +135,7 @@ auto MaFlash::GetFile(const std::string& File) -> file_t {
if (result == NO_MORE) {
return FileResult(NO_MORE);
}
- logDebug("ZIP %u %s %p", result, name.c_str(), zipfile);
+ std::cout << "ZIP " << result << " " << name.c_str() << " " << zipfile;
std::string contents;
contents.reserve(size);
contents.resize(size);
@@ -185,7 +185,7 @@ MaFlash::result_t MaFlash::DoInfo(const std::string& File) {
if (name.empty()) {
return OK;
}
- logDebug("%s [%lu]", name.c_str(), contents.size());
+ std::cout << name << " [" << contents.size() << "]";
std::cout << name << ": ";
outputMeta(json);
std::cout << '\n';
@@ -399,7 +399,6 @@ void MaFlash::DumpDeviceList() {
}
auto MaFlash::Run() -> result_t {
- logSetEnv();
bool backup = false;
bool stopOnError = false;
bool parallel = false;
diff --git a/build_driver.sh b/build_driver.sh
old mode 100644
new mode 100755
diff --git a/dkms.conf b/dkms.conf
index c1ddda4..c546db8 100644
--- a/dkms.conf
+++ b/dkms.conf
@@ -4,7 +4,7 @@
#
PACKAGE_NAME=amd-ama-kmod
-PACKAGE_VERSION=0.1.3
+PACKAGE_VERSION=1.0.0
DEST_MODULE_LOCATION[0]="/kernel/drivers/misc"
@@ -15,4 +15,4 @@ BUILT_MODULE_NAME="ama_transcoder"
BUILT_MODULE_LOCATION="."
REMAKE_INITRD=no
AUTOINSTALL=yes
-BUILD_EXCLUSIVE_KERNEL="^5\.1[5-7]\..*"
+BUILD_EXCLUSIVE_KERNEL="^6\.[567]\..*"
diff --git a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_dmabuf.c b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_dmabuf.c
index 238b07a..3cebcf8 100644
--- a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_dmabuf.c
+++ b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_dmabuf.c
@@ -63,6 +63,7 @@
#include <linux/mutex.h>
#include <linux/atomic.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-resv.h>
#include <linux/dma-buf.h>
#include <linux/platform_device.h>
@@ -197,7 +198,7 @@ dmabuf_info_show(struct device *dev, struct device_attribute *attr, char *buf)
list_for_each_entry(buf_desc, &priv->buf_list, list) {
struct dma_buf *buf_obj = buf_desc->dmabuf;
- ret = mutex_lock_interruptible(&buf_obj->lock);
+ ret = dma_resv_lock_interruptible(buf_obj->resv, NULL);
if (ret) {
len += sprintf(buf + len, "ERROR locking buffer object: skipping\n");
@@ -218,7 +219,7 @@ dmabuf_info_show(struct device *dev, struct device_attribute *attr, char *buf)
size += buf_obj->size;
npages += buf_desc->npages;
- mutex_unlock(&buf_obj->lock);
+ dma_resv_unlock(buf_obj->resv);
}
len += sprintf(buf + len, "\nTotal %d objects, %d pages, %zu bytes\n", count, npages, size);
diff --git a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_gfp.c b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_gfp.c
index 5e37b03..2654521 100644
--- a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_gfp.c
+++ b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_gfp.c
@@ -404,7 +404,7 @@ _GFPAlloc(gckALLOCATOR Allocator, PLINUX_MDL Mdl, gctSIZE_T NumPages, gctUINT32
gceSTATUS status;
gctSIZE_T i = 0;
gctBOOL contiguous = Flags & gcvALLOC_FLAG_CONTIGUOUS;
- u32 normal_gfp = __GFP_HIGH | __GFP_ATOMIC | __GFP_NORETRY | gcdNOWARN;
+ u32 normal_gfp = __GFP_HIGH | GFP_ATOMIC | __GFP_NORETRY | gcdNOWARN;
u32 gfp = (contiguous ? normal_gfp : GFP_KERNEL) | __GFP_HIGHMEM | gcdNOWARN;
struct gfp_alloc *priv = (struct gfp_alloc *)Allocator->privateData;
@@ -730,7 +730,7 @@ _GFPMmap(gckALLOCATOR Allocator, PLINUX_MDL Mdl, gctBOOL Cacheable,
gcmkHEADER_ARG("Allocator=%p Mdl=%p vma=%p", Allocator, Mdl, vma);
- vma->vm_flags |= gcdVM_FLAGS;
+ vm_flags_set(vma, gcdVM_FLAGS);
if (Cacheable == gcvFALSE) {
/* Make this mapping non-cached. */
diff --git a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_reserved_mem.c b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_reserved_mem.c
index 96ecb41..0fc9461 100644
--- a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_reserved_mem.c
+++ b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_reserved_mem.c
@@ -273,7 +273,7 @@ reserved_mem_mmap(gckALLOCATOR Allocator, PLINUX_MDL Mdl, gctBOOL Cacheable,
pfn = (res->start >> PAGE_SHIFT) + skipPages;
/* Make this mapping non-cached. */
- vma->vm_flags |= gcdVM_FLAGS;
+ vm_flags_set(vma, gcdVM_FLAGS);
#if gcdENABLE_BUFFERABLE_VIDEO_MEMORY
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
diff --git a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_user_memory.c b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_user_memory.c
index bbf00a1..633b9a5 100644
--- a/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_user_memory.c
+++ b/hal/os/linux/kernel/allocator/default/gc_hal_kernel_allocator_user_memory.c
@@ -60,6 +60,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/cache.h>
+#include <linux/mm.h>
#define _GC_OBJ_ZONE gcvZONE_ALLOCATOR
@@ -197,19 +198,7 @@ import_page_map(gckOS Os, struct device *dev, struct um_desc *um,
down_read(&current_mm_mmap_sem);
-#if LINUX_VERSION_CODE > KERNEL_VERSION(5, 6, 0)
- result = pin_user_pages(addr & PAGE_MASK, page_count,
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
- result = get_user_pages(current, current->mm, addr & PAGE_MASK, page_count,
-#else
- result = get_user_pages(addr & PAGE_MASK, page_count,
-#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) || defined(CONFIG_PPC)
- (flags & VM_WRITE) ? FOLL_WRITE : 0,
-#else
- (flags & VM_WRITE) ? 1 : 0, 0,
-#endif
- pages, NULL);
+ result = pin_user_pages(addr & PAGE_MASK, page_count, (flags & VM_WRITE) ? FOLL_WRITE : 0, pages);
up_read(&current_mm_mmap_sem);
diff --git a/hal/os/linux/kernel/gc_hal_kernel_device.c b/hal/os/linux/kernel/gc_hal_kernel_device.c
index 28090ff..d64c7c3 100644
--- a/hal/os/linux/kernel/gc_hal_kernel_device.c
+++ b/hal/os/linux/kernel/gc_hal_kernel_device.c
@@ -387,10 +387,10 @@ gc_mmuinfo_show(void *data)
gcmkPRINT(" STLB entry : \n");
stlbEntryNum = stlbSize >> 2;
for (sStart = 0; sStart < stlbEntryNum; ++sStart) {
- if (!(stlbLogical + sStart)) {
- gcmkPRINT(" stlb address is NULL...\n");
- continue;
- }
+ // if (!(stlbLogical + sStart)) {
+ // gcmkPRINT(" stlb address is NULL...\n");
+ // continue;
+ // }
stlbEntry = _ReadPageEntry(stlbLogical + sStart);
if (stlbEntry && stlbEntry & ~0XF) {
diff --git a/hal/os/linux/kernel/gc_hal_kernel_linux.h b/hal/os/linux/kernel/gc_hal_kernel_linux.h
index 7891435..b1c9007 100644
--- a/hal/os/linux/kernel/gc_hal_kernel_linux.h
+++ b/hal/os/linux/kernel/gc_hal_kernel_linux.h
@@ -269,7 +269,7 @@ gceSTATUS
_ConvertLogical2Physical(gckOS Os, gctPOINTER Logical, gctUINT32 ProcessID,
PLINUX_MDL Mdl, gctPHYS_ADDR_T *Physical);
-gctBOOL
+gceSTATUS
_QuerySignal(gckOS Os, gctSIGNAL Signal);
static inline gctINT
diff --git a/hal/os/linux/kernel/gc_hal_kernel_os.c b/hal/os/linux/kernel/gc_hal_kernel_os.c
index 08f6a82..33130ca 100644
--- a/hal/os/linux/kernel/gc_hal_kernel_os.c
+++ b/hal/os/linux/kernel/gc_hal_kernel_os.c
@@ -69,6 +69,7 @@
#include <linux/platform_device.h>
# include <linux/anon_inodes.h>
#include <linux/cpufreq.h>
+#include "internal.h"
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
# include <linux/io.h>
diff --git a/hal/os/linux/kernel/internal.h b/hal/os/linux/kernel/internal.h
new file mode 100644
index 0000000..7ef4628
--- /dev/null
+++ b/hal/os/linux/kernel/internal.h
@@ -0,0 +1,79 @@
+#pragma once
+
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/pgtable-generic.c
+ *
+ * Generic pgtable methods declared in linux/pgtable.h
+ *
+ * Copyright (C) 2010 Linus Torvalds
+ */
+
+#include <linux/pagemap.h>
+#include <linux/hugetlb.h>
+#include <linux/pgtable.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/mm_inline.h>
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+}
+
+pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
+{
+ pmd_t pmdval;
+
+ /* rcu_read_lock() to be added later */
+ pmdval = pmdp_get_lockless(pmd);
+ if (pmdvalp)
+ *pmdvalp = pmdval;
+ if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval)))
+ goto nomap;
+ if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval)))
+ goto nomap;
+ if (unlikely(pmd_bad(pmdval)))
+ {
+ pmd_clear_bad(pmd);
+ goto nomap;
+ }
+ return __pte_map(&pmdval, addr);
+nomap:
+ /* rcu_read_unlock() to be added later */
+ return NULL;
+}
+
+pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp)
+{
+ pmd_t pmdval;
+ pte_t *pte;
+
+ pte = __pte_offset_map(pmd, addr, &pmdval);
+ if (likely(pte))
+ *ptlp = pte_lockptr(mm, &pmdval);
+ return pte;
+}
+
+pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp)
+{
+ spinlock_t *ptl;
+ pmd_t pmdval;
+ pte_t *pte;
+again:
+ pte = __pte_offset_map(pmd, addr, &pmdval);
+ if (unlikely(!pte))
+ return pte;
+ ptl = pte_lockptr(mm, &pmdval);
+ spin_lock(ptl);
+ if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd))))
+ {
+ *ptlp = ptl;
+ return pte;
+ }
+ pte_unmap_unlock(pte, ptl);
+ goto again;
+}
diff --git a/hantro_afbc.c b/hantro_afbc.c
index 17fab85..78a859e 100644
--- a/hantro_afbc.c
+++ b/hantro_afbc.c
@@ -30,7 +30,7 @@
#include <linux/device.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
-#include <stddef.h>
+// #include <stddef.h>
#include <linux/platform_device.h>
#include <linux/dma-buf.h>
diff --git a/hantro_axife.c b/hantro_axife.c
index 40f5fcb..ce17924 100644
--- a/hantro_axife.c
+++ b/hantro_axife.c
@@ -35,7 +35,7 @@
#include <linux/device.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
-#include <stddef.h>
+// #include <stddef.h>
#include <linux/platform_device.h>
#include <linux/dma-buf.h>
#endif
diff --git a/pcie.c b/pcie.c
index 0fa708a..61742a7 100644
--- a/pcie.c
+++ b/pcie.c
@@ -753,11 +753,11 @@ int sn_pci_init(struct sn_tranx_t *tdev)
}
pci_set_master(tdev->pdev);
- if (pci_set_dma_mask(tdev->pdev, DMA_BIT_MASK(64)) ||
- pci_set_consistent_dma_mask(tdev->pdev, DMA_BIT_MASK(64))) {
+ if (dma_set_mask(&tdev->pdev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_coherent_mask(&tdev->pdev->dev, DMA_BIT_MASK(64))) {
sn_pri(tdev, SN_ERR, "pcie: Set pci dma mask 64 failed.\n");
- if (pci_set_dma_mask(tdev->pdev, DMA_BIT_MASK(32)) ||
- pci_set_consistent_dma_mask(tdev->pdev, DMA_BIT_MASK(32))) {
+ if (dma_set_mask(&tdev->pdev->dev, DMA_BIT_MASK(32)) ||
+ dma_set_coherent_mask(&tdev->pdev->dev, DMA_BIT_MASK(32))) {
sn_pri(tdev, SN_ERR,
"pcie: No suitable DMA available.\n");
goto out_disable_pci;
diff --git a/reg_dump.c b/reg_dump.c
index 11c2bb8..46a732d 100644
--- a/reg_dump.c
+++ b/reg_dump.c
@@ -44,31 +44,20 @@ struct task_info {
static struct task_info task_dump = {0};
-static struct file *open_dump_file(char* string, mm_segment_t* fs)
+static struct file *open_dump_file(char* string)
{
struct file *filp = NULL;
filp = filp_open(string, O_RDWR | O_CREAT | O_APPEND, 0644);
if (IS_ERR(filp)) {
return NULL;
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
- *fs = get_fs();
- set_fs(KERNEL_DS);
-#else
- *fs = force_uaccess_begin();
-#endif
return filp;
}
-static int close_dump_file(struct file * filp, mm_segment_t* fs)
+static int close_dump_file(struct file * filp)
{
- if (filp == NULL || fs == NULL)
+ if (filp == NULL)
return -EFAULT;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
- set_fs(*fs);
-#else
- force_uaccess_end(*fs);
-#endif
filp_close(filp, NULL);
return 0;
}
@@ -105,7 +94,6 @@ int dump_ip_regs(struct sn_tranx_t *tdev, char *buf)
{
char path_name[64];
struct file *filp = NULL;
- mm_segment_t fs;
ktime_t k_time;
struct rtc_time tm;
struct reg_dump_t *dump = tdev->modules[SN_MODULE_REG_DUMP];
@@ -123,7 +111,7 @@ int dump_ip_regs(struct sn_tranx_t *tdev, char *buf)
tm = rtc_ktime_to_tm(k_time);
sprintf(path_name, "/var/log/reg-%s-%s-%02d%02d%02d.log", tdev->dev_name, ip_dumps[i].name, tm.tm_hour + 8, tm.tm_min, tm.tm_sec);
mutex_lock(&dump->reg_dump_lock);
- filp = open_dump_file(path_name, &fs);
+ filp = open_dump_file(path_name);
if (filp == NULL) {
sn_pri(tdev, SN_ERR, "%s: open file failed.\n", __func__);
mutex_unlock(&dump->reg_dump_lock);
@@ -137,7 +125,7 @@ int dump_ip_regs(struct sn_tranx_t *tdev, char *buf)
return ret;
}
- ret = close_dump_file(filp, &fs);
+ ret = close_dump_file(filp);
if (ret) {
sn_pri(tdev, SN_ERR, "%s: close file failed. ret = %d\n", __func__, ret);
mutex_unlock(&dump->reg_dump_lock);
diff --git a/sn_perf.h b/sn_perf.h
index 70ba7d0..84ca49a 100644
--- a/sn_perf.h
+++ b/sn_perf.h
@@ -10,6 +10,7 @@
#pragma once
#include "common.h"
+#include "sn_perf_types.h"
int sn_perf_init(struct sn_tranx_t* tdev);
void sn_perf_release(struct sn_tranx_t* tdev);
@@ -18,6 +19,6 @@ void sn_perf_close(struct sn_tranx_t* tdev, struct file* filp);
typedef void* SnPerfHandle;
typedef int (*sn_perf_callback_fn)(struct sn_tranx_t* tdev, __u32 ipId, __u32 cmd, __u32 arg);
-SnPerfHandle sn_perf_register(struct sn_tranx_t *tdev, const char* name, __u32 ipId, sn_perf_callback_fn callback);
+SnPerfHandle sn_perf_register(struct sn_tranx_t *tdev, const char* name, SN_PERF_IP_ID ipId, sn_perf_callback_fn callback);
void sn_perf_record(SnPerfHandle handle, sn_perf_event* event);
void sn_perf_load(SnPerfHandle handle, __u32 load);
\ No newline at end of file
diff --git a/transcoder.c b/transcoder.c
index 85786e1..9a5304c 100644
--- a/transcoder.c
+++ b/transcoder.c
@@ -309,8 +309,8 @@ static int trans_mmap(struct file *file, struct vm_area_struct *vma)
return -EFAULT;
}
- vma->vm_flags &= ~VM_IO;
- vma->vm_flags |= (VM_DONTEXPAND | VM_DONTDUMP);
+ vm_flags_clear(vma, VM_IO);
+ vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
vma->vm_page_prot) < 0) {
sn_pri(tdev, SN_ERR, "core: remap_pfn_range failed.\n");
diff --git a/vcmd/hantro_vcmd.c b/vcmd/hantro_vcmd.c
index 2c1454a..f928328 100644
--- a/vcmd/hantro_vcmd.c
+++ b/vcmd/hantro_vcmd.c
@@ -2285,7 +2285,7 @@ int hantrovcmd_release(struct sn_tranx_t *tdev, struct file *filp)
for (core_id = 0;core_id < vcmd_dev->total_vcmd_core_num; core_id++)
{
- if((&dev[core_id])==NULL || !dev[core_id].is_valid)
+ if(!dev[core_id].is_valid)
continue;
if (down_interruptible(&vcmd_dev->vcmd_reserve_cmdbuf_sem[dev[core_id].vcmd_core_cfg.sub_module_type]))
return -ERESTARTSYS;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment