Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Kamillaova/4caa47cc891e04eb50b94d8513f7533e to your computer and use it in GitHub Desktop.
Save Kamillaova/4caa47cc891e04eb50b94d8513f7533e to your computer and use it in GitHub Desktop.
[PATCH] Driver of Intel(R) Gaussian & Neural Accelerator
From 00f3bad3bd4e9b4a60983857b805082750de30ed Mon Sep 17 00:00:00 2001
From: Kamillaova <me@kamillaova.dev>
Date: Sat, 6 Apr 2024 21:53:42 +0300
Subject: [PATCH] Driver of Intel(R) Gaussian & Neural Accelerator
Link: https://lore.kernel.org/dri-devel/20221020175334.1820519-1-maciej.kwapulinski@linux.intel.com
---
Documentation/gpu/drivers.rst | 1 +
Documentation/gpu/gna.rst | 64 +++++
MAINTAINERS | 7 +
drivers/gpu/drm/Kconfig | 2 +
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/gna/Kbuild | 5 +
drivers/gpu/drm/gna/Kconfig | 15 +
drivers/gpu/drm/gna/gna_device.c | 317 +++++++++++++++++++++
drivers/gpu/drm/gna/gna_device.h | 114 ++++++++
drivers/gpu/drm/gna/gna_gem.h | 22 ++
drivers/gpu/drm/gna/gna_hw.c | 110 ++++++++
drivers/gpu/drm/gna/gna_hw.h | 107 ++++++++
drivers/gpu/drm/gna/gna_ioctl.c | 208 ++++++++++++++
drivers/gpu/drm/gna/gna_mem.c | 249 +++++++++++++++++
drivers/gpu/drm/gna/gna_mem.h | 58 ++++
drivers/gpu/drm/gna/gna_pci.c | 148 ++++++++++
drivers/gpu/drm/gna/gna_pci.h | 12 +
drivers/gpu/drm/gna/gna_request.c | 441 ++++++++++++++++++++++++++++++
drivers/gpu/drm/gna/gna_request.h | 64 +++++
drivers/gpu/drm/gna/gna_score.c | 222 +++++++++++++++
drivers/gpu/drm/gna/gna_score.h | 11 +
include/uapi/drm/gna_drm.h | 169 ++++++++++++
22 files changed, 2347 insertions(+)
create mode 100644 Documentation/gpu/gna.rst
create mode 100644 drivers/gpu/drm/gna/Kbuild
create mode 100644 drivers/gpu/drm/gna/Kconfig
create mode 100644 drivers/gpu/drm/gna/gna_device.c
create mode 100644 drivers/gpu/drm/gna/gna_device.h
create mode 100644 drivers/gpu/drm/gna/gna_gem.h
create mode 100644 drivers/gpu/drm/gna/gna_hw.c
create mode 100644 drivers/gpu/drm/gna/gna_hw.h
create mode 100644 drivers/gpu/drm/gna/gna_ioctl.c
create mode 100644 drivers/gpu/drm/gna/gna_mem.c
create mode 100644 drivers/gpu/drm/gna/gna_mem.h
create mode 100644 drivers/gpu/drm/gna/gna_pci.c
create mode 100644 drivers/gpu/drm/gna/gna_pci.h
create mode 100644 drivers/gpu/drm/gna/gna_request.c
create mode 100644 drivers/gpu/drm/gna/gna_request.h
create mode 100644 drivers/gpu/drm/gna/gna_score.c
create mode 100644 drivers/gpu/drm/gna/gna_score.h
create mode 100644 include/uapi/drm/gna_drm.h
diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index b899cbc5c2b4..1ca0ab0f50fa 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -6,6 +6,7 @@ GPU Driver Documentation
:maxdepth: 3
amdgpu/index
+ gna
i915
imagination/index
mcde
diff --git a/Documentation/gpu/gna.rst b/Documentation/gpu/gna.rst
new file mode 100644
index 000000000000..7f3b7ce7e8f7
--- /dev/null
+++ b/Documentation/gpu/gna.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================================
+Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA)
+=====================================================
+
+Acronyms
+--------
+GNA - Gaussian & Neural Accelerator
+GMM - Gaussian Mixer Model
+CNN - Convolutional Neural Network
+RNN - Recurrent Neural Networks
+DNN - Deep Neural Networks
+
+Introduction
+------------
+The Intel(R) GNA is an internal PCI fixed device available on several Intel platforms/SoCs.
+Feature set depends on the Intel chipset SKU.
+
+Intel(R) GNA provides hardware accelerated computation for GMMs and Neural Networks.
+It supports several layer types: affine, recurrent, and convolutional among others.
+Hardware also provides helper layer types for copying and transposing matrices.
+
+Linux Driver
+------------
+The driver also registers a DRM's render device to expose file operations via dev node.
+
+The driver probes/removes a PCI device, implements file operations, handles runtime
+power management, and interacts with hardware through MMIO registers.
+
+Multiple processes can independently file many requests to the driver. These requests are
+processed in a FIFO manner. The hardware can process one request at a time by using a FIFO
+queue.
+
+IOCTL
+-----
+Intel(R) GNA driver controls the device through IOCTL interfaces.
+Following IOCTL commands - handled by DRM framework - are supported:
+
+GNA_GET_PARAMETER gets driver and device capabilities.
+
+GNA_GEM_NEW acquires new 4KB page aligned memory region ready for DMA operations.
+
+GNA_GEM_FREE frees memory region back to system.
+
+GNA_COMPUTE submits a request to the device queue.
+ Memory regions acquired by GNA_GEM_NEW are part of request.
+
+GNA_WAIT blocks and waits on the submitted request.
+
+GNA MMU
+-------
+GNA’s MMU is being configured based on specific request memory usage. As the MMU can
+address up to 256MB a single scoring request is limited to this amount of memory being
+used.
+
+GNA Library can allocate any number of memory regions for GNA usage. Its number and total
+capacity are limited by the OSs’ resources. Due to GNA MMU restrictions, even when using
+multiple memory regions, the sum of all the memory regions used within a single inference
+request must be no larger than 256MB.
+
+At least a single GNA memory region is needed to be allocated (and can be shared by
+multiple models). At the other extreme, each GNA tensor (e.g.,
+weights/biases/inputs/outputs) could use its own, separate GNA memory region.
diff --git a/MAINTAINERS b/MAINTAINERS
index 1aabf1c15bb3..1e91987281f4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10790,6 +10790,13 @@ S: Supported
F: drivers/infiniband/hw/irdma/
F: include/uapi/rdma/irdma-abi.h
+INTEL GNA PCI DRIVER
+M: Maciej Kwapulinski <maciej.kwapulinski@linux.intel.com>
+S: Maintained
+F: Documentation/gpu/gna.rst
+F: drivers/gpu/drm/gna/*
+F: include/uapi/drm/gna_drm.h
+
INTEL GPIO DRIVERS
M: Andy Shevchenko <andy@kernel.org>
L: linux-gpio@vger.kernel.org
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index c7edba18a6f0..15a1c7f1b310 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -400,6 +400,8 @@ source "drivers/gpu/drm/sprd/Kconfig"
source "drivers/gpu/drm/imagination/Kconfig"
+source "drivers/gpu/drm/gna/Kconfig"
+
config DRM_HYPERV
tristate "DRM Support for Hyper-V synthetic video device"
depends on DRM && PCI && MMU && HYPERV
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 104b42df2e95..44e633e9471d 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -187,5 +187,6 @@ obj-y += gud/
obj-$(CONFIG_DRM_HYPERV) += hyperv/
obj-y += solomon/
obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_GNA) += gna/
obj-$(CONFIG_DRM_LOONGSON) += loongson/
obj-$(CONFIG_DRM_POWERVR) += imagination/
diff --git a/drivers/gpu/drm/gna/Kbuild b/drivers/gpu/drm/gna/Kbuild
new file mode 100644
index 000000000000..d799c9530f79
--- /dev/null
+++ b/drivers/gpu/drm/gna/Kbuild
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+gna-y := gna_device.o gna_hw.o gna_ioctl.o gna_mem.o gna_pci.o gna_request.o gna_score.o
+
+obj-$(CONFIG_DRM_GNA) += gna.o
diff --git a/drivers/gpu/drm/gna/Kconfig b/drivers/gpu/drm/gna/Kconfig
new file mode 100644
index 000000000000..6c32716bf43a
--- /dev/null
+++ b/drivers/gpu/drm/gna/Kconfig
@@ -0,0 +1,15 @@
+#
+# Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA)
+#
+
+config DRM_GNA
+ tristate "Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA)"
+ depends on X86 && PCI
+ depends on DRM
+ select DRM_GEM_SHMEM_HELPER
+ help
+ This option enables the Intel(R) Gaussian & Neural Accelerator
+ (Intel(R) GNA) driver: gna
+ User space interface is defined in include/uapi/drm/gna_drm.h, while
+ information about functionality is in
+ Documentation/gpu/gna.rst
diff --git a/drivers/gpu/drm/gna/gna_device.c b/drivers/gpu/drm/gna/gna_device.c
new file mode 100644
index 000000000000..4ce08bf313c3
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_device.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include <uapi/drm/gna_drm.h>
+
+#include "gna_device.h"
+#include "gna_gem.h"
+#include "gna_request.h"
+
+#define GNA_DDI_VERSION_CURRENT GNA_DDI_VERSION_3
+
+DEFINE_DRM_GEM_FOPS(gna_drm_fops);
+
+static const struct drm_ioctl_desc gna_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(GNA_GET_PARAMETER, gna_getparam_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(GNA_GEM_NEW, gna_gem_new_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(GNA_GEM_FREE, gna_gem_free_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(GNA_COMPUTE, gna_score_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(GNA_WAIT, gna_wait_ioctl, DRM_RENDER_ALLOW),
+};
+
+static int __maybe_unused gna_runtime_suspend(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct gna_device *gna_priv = to_gna_device(drm_dev);
+ u32 val = gna_reg_read(gna_priv, GNA_MMIO_D0I3C);
+
+ dev_dbg(dev, "%s D0I3, reg %.8x\n", __func__, val);
+
+ return 0;
+}
+
+static int __maybe_unused gna_runtime_resume(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct gna_device *gna_priv = to_gna_device(drm_dev);
+ u32 val = gna_reg_read(gna_priv, GNA_MMIO_D0I3C);
+
+ dev_dbg(dev, "%s D0I3, reg %.8x\n", __func__, val);
+
+ return 0;
+}
+
+const struct dev_pm_ops __maybe_unused gna_pm = {
+ SET_RUNTIME_PM_OPS(gna_runtime_suspend, gna_runtime_resume, NULL)
+};
+
+static int gna_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct gna_device *gna_priv;
+
+ gna_priv = to_gna_device(dev);
+
+ file->driver_priv = gna_priv;
+
+ return 0;
+}
+
+static void gna_delete_file_requests(struct drm_file *file, struct gna_device *gna_priv)
+{
+ struct gna_request *req, *temp_req;
+ struct list_head *reqs_list;
+
+ mutex_lock(&gna_priv->reqlist_lock);
+
+ reqs_list = &gna_priv->request_list;
+ if (!list_empty(reqs_list)) {
+ list_for_each_entry_safe(req, temp_req, reqs_list, node) {
+ if (req->drm_f == file) {
+ bool is_pending;
+
+ list_del_init(&req->node);
+ is_pending = cancel_work_sync(&req->work);
+ if (is_pending)
+ atomic_dec(&gna_priv->enqueued_requests);
+ kref_put(&req->refcount, gna_request_release);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&gna_priv->reqlist_lock);
+}
+
+static void gna_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct gna_device *gna_priv = (struct gna_device *)file->driver_priv;
+
+ gna_delete_file_requests(file, gna_priv);
+}
+
+static void gna_drm_dev_fini(struct drm_device *dev, void *ptr)
+{
+ drm_dev_unregister(dev);
+}
+
+static int gna_drm_dev_init(struct drm_device *dev)
+{
+ int err;
+
+ err = drm_dev_register(dev, 0);
+ if (err)
+ return err;
+
+ return drmm_add_action_or_reset(dev, gna_drm_dev_fini, NULL);
+}
+
+static void gna_pm_init(struct device *dev)
+{
+ pm_runtime_set_autosuspend_delay(dev, 2000);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_allow(dev);
+ pm_runtime_put_noidle(dev);
+}
+
+static void gna_pm_fini(struct drm_device *drm, void *data)
+{
+ struct device *dev = data;
+
+ pm_runtime_get_noresume(dev);
+}
+
+static irqreturn_t gna_interrupt(int irq, void *priv)
+{
+ struct gna_device *gna_priv;
+
+ gna_priv = (struct gna_device *)priv;
+ gna_priv->dev_busy = false;
+ wake_up(&gna_priv->dev_busy_waitq);
+ return IRQ_HANDLED;
+}
+
+static void gna_workqueue_fini(struct drm_device *drm, void *data)
+{
+ struct workqueue_struct *request_wq = data;
+
+ destroy_workqueue(request_wq);
+}
+
+static int gna_workqueue_init(struct gna_device *gna_priv)
+{
+ const char *name = gna_name(gna_priv);
+
+ gna_priv->request_wq = create_singlethread_workqueue(name);
+ if (!gna_priv->request_wq)
+ return -EFAULT;
+
+ return drmm_add_action_or_reset(&gna_priv->drm, gna_workqueue_fini, gna_priv->request_wq);
+}
+
+static struct drm_gem_object *gna_create_gem_object(struct drm_device *dev,
+ size_t size)
+{
+ struct drm_gem_shmem_object *dshmem;
+ struct gna_gem_object *shmem;
+
+ shmem = kzalloc(sizeof(*shmem), GFP_KERNEL);
+ if (!shmem)
+ return NULL;
+
+ dshmem = &shmem->base;
+
+ return &dshmem->base;
+}
+
+static const struct drm_driver gna_drm_driver = {
+ .driver_features = DRIVER_GEM | DRIVER_RENDER,
+ .open = gna_open,
+ .postclose = gna_close,
+
+ .gem_create_object = gna_create_gem_object,
+
+ .ioctls = gna_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(gna_drm_ioctls),
+ .fops = &gna_drm_fops,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase, int irq)
+{
+ struct gna_device *gna_priv;
+ struct drm_device *drm_dev;
+ u32 bld_reg;
+ int err;
+
+ gna_priv = devm_drm_dev_alloc(parent, &gna_drm_driver, struct gna_device, drm);
+ if (IS_ERR(gna_priv))
+ return PTR_ERR(gna_priv);
+
+ drm_dev = &gna_priv->drm;
+ gna_priv->recovery_timeout_jiffies = msecs_to_jiffies(60*1000);
+ gna_priv->iobase = iobase;
+ gna_priv->info = *dev_info;
+
+ atomic_set(&gna_priv->enqueued_requests, 0);
+
+ if (!(sizeof(dma_addr_t) > 4) ||
+ dma_set_mask(parent, DMA_BIT_MASK(64))) {
+ err = dma_set_mask(parent, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+ }
+
+ bld_reg = gna_reg_read(gna_priv, GNA_MMIO_IBUFFS);
+ gna_priv->hw_info.in_buf_s = bld_reg & GENMASK(7, 0);
+
+ err = gna_mmu_init(gna_priv);
+ if (err)
+ return err;
+
+ dev_dbg(parent, "maximum memory size %llu num pd %d\n",
+ gna_priv->info.max_hw_mem, gna_priv->info.num_pagetables);
+ dev_dbg(parent, "desc rsvd size %d mmu vamax size %d\n",
+ gna_priv->info.desc_info.rsvd_size,
+ gna_priv->info.desc_info.mmu_info.vamax_size);
+
+ mutex_init(&gna_priv->mmu_lock);
+
+ atomic_set(&gna_priv->request_count, 0);
+
+ mutex_init(&gna_priv->reqlist_lock);
+ INIT_LIST_HEAD(&gna_priv->request_list);
+
+ init_waitqueue_head(&gna_priv->dev_busy_waitq);
+
+ err = gna_workqueue_init(gna_priv);
+ if (err)
+ return err;
+
+ err = devm_request_irq(parent, irq, gna_interrupt,
+ IRQF_SHARED, gna_name(gna_priv), gna_priv);
+ if (err)
+ return err;
+
+ dev_set_drvdata(parent, drm_dev);
+
+ err = gna_drm_dev_init(drm_dev);
+ if (err)
+ return err;
+
+ gna_pm_init(parent);
+ err = drmm_add_action(drm_dev, gna_pm_fini, parent);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static u32 gna_device_type_by_hwid(u32 hwid)
+{
+ switch (hwid) {
+ case GNA_DEV_HWID_CNL:
+ return GNA_DEV_TYPE_0_9;
+ case GNA_DEV_HWID_GLK:
+ case GNA_DEV_HWID_EHL:
+ case GNA_DEV_HWID_ICL:
+ return GNA_DEV_TYPE_1_0;
+ case GNA_DEV_HWID_JSL:
+ case GNA_DEV_HWID_TGL:
+ case GNA_DEV_HWID_RKL:
+ return GNA_DEV_TYPE_2_0;
+ case GNA_DEV_HWID_ADL:
+ case GNA_DEV_HWID_RPL:
+ return GNA_DEV_TYPE_3_0;
+ case GNA_DEV_HWID_MTL:
+ return GNA_DEV_TYPE_3_5;
+ default:
+ return 0;
+ }
+}
+
+int gna_getparam(struct gna_device *gna_priv, union gna_parameter *param)
+{
+ switch (param->in.id) {
+ case GNA_PARAM_RECOVERY_TIMEOUT:
+ param->out.value = jiffies_to_msecs(gna_priv->recovery_timeout_jiffies) / 1000;
+ break;
+ case GNA_PARAM_INPUT_BUFFER_S:
+ param->out.value = gna_priv->hw_info.in_buf_s;
+ break;
+ case GNA_PARAM_DEVICE_TYPE:
+ param->out.value = gna_device_type_by_hwid(gna_priv->info.hwid);
+ break;
+ case GNA_PARAM_DDI_VERSION:
+ param->out.value = GNA_DDI_VERSION_CURRENT;
+ break;
+ default:
+ dev_dbg(gna_dev(gna_priv), "unknown parameter id: %llu\n", param->in.id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA) Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/gna/gna_device.h b/drivers/gpu/drm/gna/gna_device.h
new file mode 100644
index 000000000000..6eae0f2f44df
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_device.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_DEVICE_H__
+#define __GNA_DEVICE_H__
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+
+#include <linux/atomic.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+#include <linux/types.h>
+
+#include "gna_gem.h"
+#include "gna_hw.h"
+#include "gna_mem.h"
+
+#define DRIVER_NAME "gna"
+#define DRIVER_DESC "Intel(R) Gaussian & Neural Accelerator (Intel(R) GNA)"
+#define DRIVER_DATE "20211201"
+
+#define DRIVER_MAJOR 1
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+struct workqueue_struct;
+union gna_parameter;
+struct drm_file;
+struct device;
+
+struct gna_device {
+ struct drm_device drm;
+
+ int recovery_timeout_jiffies;
+
+ /* hardware status set by interrupt handler */
+ u32 hw_status;
+
+ /* device related resources */
+ void __iomem *iobase;
+ struct gna_dev_info info;
+ struct gna_hw_info hw_info;
+
+ struct gna_mmu_object mmu;
+ struct mutex mmu_lock;
+
+ /* if true, then gna device is processing */
+ bool dev_busy;
+ struct wait_queue_head dev_busy_waitq;
+
+ struct list_head request_list;
+ /* protects request_list */
+ struct mutex reqlist_lock;
+ struct workqueue_struct *request_wq;
+ atomic_t request_count;
+
+ /* requests that are in queue to be run +1 for currently processed one */
+ atomic_t enqueued_requests;
+};
+
+int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase, int irq);
+int gna_getparam(struct gna_device *gna_priv, union gna_parameter *param);
+
+int gna_getparam_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+int gna_gem_new_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+int gna_gem_free_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+int gna_score_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+int gna_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+extern const struct dev_pm_ops __maybe_unused gna_pm;
+
+static inline u32 gna_reg_read(struct gna_device *gna_priv, u32 reg)
+{
+ return readl(gna_priv->iobase + reg);
+}
+
+static inline void gna_reg_write(struct gna_device *gna_priv, u32 reg, u32 val)
+{
+ writel(val, gna_priv->iobase + reg);
+}
+
+static inline const char *gna_name(struct gna_device *gna_priv)
+{
+ return gna_priv->drm.unique;
+}
+
+static inline struct device *gna_dev(struct gna_device *gna_priv)
+{
+ return gna_priv->drm.dev;
+}
+
+static inline struct gna_device *to_gna_device(struct drm_device *dev)
+{
+ return container_of(dev, struct gna_device, drm);
+}
+
+static inline struct gna_gem_object *to_gna_gem_obj(struct drm_gem_shmem_object *drm_gem_shmem)
+{
+ return container_of(drm_gem_shmem, struct gna_gem_object, base);
+}
+
+#endif /* __GNA_DEVICE_H__ */
diff --git a/drivers/gpu/drm/gna/gna_gem.h b/drivers/gpu/drm/gna/gna_gem.h
new file mode 100644
index 000000000000..92372fc93718
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_gem.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_GEM_H__
+#define __GNA_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+struct gna_gem_object {
+ struct drm_gem_shmem_object base;
+
+ uint32_t handle;
+
+ struct work_struct work;
+
+ struct wait_queue_head waitq;
+};
+
+#endif /* __GNA_GEM_H__ */
diff --git a/drivers/gpu/drm/gna/gna_hw.c b/drivers/gpu/drm/gna/gna_hw.c
new file mode 100644
index 000000000000..dff7c6b3edea
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_hw.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+
+#include <uapi/drm/gna_drm.h>
+
+#include "gna_device.h"
+#include "gna_hw.h"
+
+int gna_parse_hw_status(struct gna_device *gna_priv, u32 hw_status)
+{
+ if (hw_status & GNA_ERROR) {
+ dev_dbg(gna_dev(gna_priv), "GNA completed with errors: %#x\n", hw_status);
+ return -EIO;
+ }
+
+ if (hw_status & GNA_STS_SCORE_COMPLETED) {
+ dev_dbg(gna_dev(gna_priv), "GNA completed successfully: %#x\n", hw_status);
+ return 0;
+ }
+
+ dev_dbg(gna_dev(gna_priv), "GNA not completed, status: %#x\n", hw_status);
+ return -ENODATA;
+}
+
+void gna_print_error_status(struct gna_device *gna_priv, u32 hw_status)
+{
+ if (hw_status & GNA_STS_PARAM_OOR)
+ dev_dbg(gna_dev(gna_priv), "GNA error: Param Out Range Error\n");
+
+ if (hw_status & GNA_STS_VA_OOR)
+ dev_dbg(gna_dev(gna_priv), "GNA error: VA Out of Range Error\n");
+
+ if (hw_status & GNA_STS_PCI_MMU_ERR)
+ dev_dbg(gna_dev(gna_priv), "GNA error: PCI MMU Error\n");
+
+ if (hw_status & GNA_STS_PCI_DMA_ERR)
+ dev_dbg(gna_dev(gna_priv), "GNA error: PCI MMU Error\n");
+
+ if (hw_status & GNA_STS_PCI_UNEXCOMPL_ERR)
+ dev_dbg(gna_dev(gna_priv), "GNA error: PCI Unexpected Completion Error\n");
+
+ if (hw_status & GNA_STS_SATURATE)
+ dev_dbg(gna_dev(gna_priv), "GNA error: Saturation Reached !\n");
+}
+
+bool gna_hw_perf_enabled(struct gna_device *gna_priv)
+{
+ u32 ctrl = gna_reg_read(gna_priv, GNA_MMIO_CTRL);
+
+ return !!FIELD_GET(GNA_CTRL_COMP_STATS_EN, ctrl);
+}
+
+void gna_start_scoring(struct gna_device *gna_priv,
+ struct gna_compute_cfg *compute_cfg)
+{
+ u32 ctrl = gna_reg_read(gna_priv, GNA_MMIO_CTRL);
+
+ ctrl |= GNA_CTRL_START_ACCEL | GNA_CTRL_COMP_INT_EN | GNA_CTRL_ERR_INT_EN;
+
+ ctrl &= ~GNA_CTRL_COMP_STATS_EN;
+ ctrl |= FIELD_PREP(GNA_CTRL_COMP_STATS_EN,
+ compute_cfg->hw_perf_encoding & FIELD_MAX(GNA_CTRL_COMP_STATS_EN));
+
+ ctrl &= ~GNA_CTRL_ACTIVE_LIST_EN;
+ ctrl |= FIELD_PREP(GNA_CTRL_ACTIVE_LIST_EN,
+ compute_cfg->active_list_on & FIELD_MAX(GNA_CTRL_ACTIVE_LIST_EN));
+
+ ctrl &= ~GNA_CTRL_OP_MODE;
+ ctrl |= FIELD_PREP(GNA_CTRL_OP_MODE,
+ compute_cfg->gna_mode & FIELD_MAX(GNA_CTRL_OP_MODE));
+
+ gna_reg_write(gna_priv, GNA_MMIO_CTRL, ctrl);
+}
+
+static void gna_clear_saturation(struct gna_device *gna_priv)
+{
+ u32 val;
+
+ val = gna_reg_read(gna_priv, GNA_MMIO_STS);
+ if (val & GNA_STS_SATURATE) {
+ dev_dbg(gna_dev(gna_priv), "status (saturation): %#x\n", val);
+
+ val = val & GNA_STS_SATURATE;
+ gna_reg_write(gna_priv, GNA_MMIO_STS, val);
+ }
+}
+
+int gna_abort_hw(struct gna_device *gna_priv)
+{
+ u32 val;
+
+ /* saturation bit in the GNA status register needs
+ * to be explicitly cleared.
+ */
+ gna_clear_saturation(gna_priv);
+
+ val = gna_reg_read(gna_priv, GNA_MMIO_STS);
+ dev_dbg(gna_dev(gna_priv), "status (before abort): %#x\n", val);
+
+ val = gna_reg_read(gna_priv, GNA_MMIO_CTRL);
+ val |= GNA_CTRL_ABORT_CLR_ACCEL;
+ gna_reg_write(gna_priv, GNA_MMIO_CTRL, val);
+
+ return readl_poll_timeout(gna_priv->iobase + GNA_MMIO_STS, val,
+ !(val & 0x1),
+ 0, 1000);
+}
diff --git a/drivers/gpu/drm/gna/gna_hw.h b/drivers/gpu/drm/gna/gna_hw.h
new file mode 100644
index 000000000000..97338e1be3b6
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_hw.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_HW_H__
+#define __GNA_HW_H__
+
+#include <linux/bits.h>
+#include <linux/mm_types.h>
+
+struct gna_compute_cfg;
+struct gna_device;
+
+#define GNA_FEATURES \
+ .max_hw_mem = 256 * 1024 * 1024, \
+ .num_pagetables = 64, \
+ .num_page_entries = PAGE_SIZE / sizeof(u32), \
+ /* desc_info all in bytes */ \
+ .desc_info = { \
+ .rsvd_size = 256, \
+ .cfg_size = 256, \
+ .desc_size = 784, \
+ .mmu_info = { \
+ .vamax_size = 4, \
+ .rsvd_size = 12, \
+ .pd_size = 4 * 64, \
+ }, \
+ }
+
+#define GNA_GEN1_FEATURES \
+ GNA_FEATURES, \
+ .max_layer_count = 1024
+
+#define GNA_GEN2_FEATURES \
+ GNA_FEATURES, \
+ .max_layer_count = 4096
+
+#define GNA_DEV_HWID_CNL 0x5A11
+#define GNA_DEV_HWID_EHL 0x4511
+#define GNA_DEV_HWID_GLK 0x3190
+#define GNA_DEV_HWID_ICL 0x8A11
+#define GNA_DEV_HWID_JSL 0x4E11
+#define GNA_DEV_HWID_TGL 0x9A11
+#define GNA_DEV_HWID_RKL 0x4C11
+#define GNA_DEV_HWID_ADL 0x464F
+#define GNA_DEV_HWID_RPL 0xA74F
+#define GNA_DEV_HWID_MTL 0x7E4C
+
+/* GNA MMIO registers */
+#define GNA_MMIO_STS 0x80
+#define GNA_MMIO_CTRL 0x84
+#define GNA_MMIO_PTC 0x8C
+#define GNA_MMIO_PSC 0x90
+#define GNA_MMIO_D0I3C 0xA8
+#define GNA_MMIO_DESBASE 0xB0
+#define GNA_MMIO_IBUFFS 0xB4
+
+#define GNA_PT_ENTRY_SIZE 4
+/* there are up to 1024 32-bit pointers in one page in Page Table (L1) */
+#define GNA_PT_LENGTH (PAGE_SIZE / GNA_PT_ENTRY_SIZE)
+
+#define GNA_PGDIRN_LEN 64
+#define GNA_PGDIR_ENTRIES 1024 /* 32-bit page addresses */
+#define GNA_PGDIR_INVALID 1
+
+#define GNA_CTRL_START_ACCEL BIT(0)
+#define GNA_CTRL_ACTIVE_LIST_EN BIT(1)
+#define GNA_CTRL_ABORT_CLR_ACCEL BIT(2)
+#define GNA_CTRL_OP_MODE GENMASK(6, 5)
+#define GNA_CTRL_COMP_INT_EN BIT(8)
+#define GNA_CTRL_ERR_INT_EN BIT(10)
+#define GNA_CTRL_COMP_STATS_EN GENMASK(15, 12)
+
+struct gna_mmu_info {
+ u32 vamax_size;
+ u32 rsvd_size;
+ u32 pd_size;
+};
+
+struct gna_desc_info {
+ u32 rsvd_size;
+ u32 cfg_size;
+ u32 desc_size;
+ struct gna_mmu_info mmu_info;
+};
+
+struct gna_hw_info {
+ u8 in_buf_s;
+};
+
+struct gna_dev_info {
+ u32 hwid;
+ u32 num_pagetables;
+ u32 num_page_entries;
+ u32 max_layer_count;
+ u64 max_hw_mem;
+
+ struct gna_desc_info desc_info;
+};
+
+int gna_abort_hw(struct gna_device *gna_priv);
+bool gna_hw_perf_enabled(struct gna_device *gna_priv);
+int gna_parse_hw_status(struct gna_device *gna_priv, u32 hw_status);
+void gna_print_error_status(struct gna_device *gna_priv, u32 hw_status);
+void gna_start_scoring(struct gna_device *gna_priv,
+ struct gna_compute_cfg *compute_cfg);
+
+#endif // __GNA_HW_H__
diff --git a/drivers/gpu/drm/gna/gna_ioctl.c b/drivers/gpu/drm/gna/gna_ioctl.c
new file mode 100644
index 000000000000..f3c805e946ee
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_ioctl.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_file.h>
+
+#include <linux/jiffies.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include <uapi/drm/gna_drm.h>
+
+#include "gna_device.h"
+#include "gna_gem.h"
+#include "gna_request.h"
+
+int gna_score_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ union gna_compute *score_args = data;
+ u64 request_id;
+ int ret;
+
+ ret = gna_validate_score_config(&score_args->in.config, to_gna_device(dev));
+ if (ret)
+ return ret;
+
+ ret = gna_enqueue_request(&score_args->in.config, file, &request_id);
+ if (ret)
+ return ret;
+
+ score_args->out.request_id = request_id;
+
+ return 0;
+}
+
+static struct gna_request *gna_find_request_by_id(u64 req_id, struct gna_device *gna_priv)
+{
+ struct gna_request *req, *found_req;
+ struct list_head *reqs_list;
+
+ mutex_lock(&gna_priv->reqlist_lock);
+
+ reqs_list = &gna_priv->request_list;
+ found_req = NULL;
+ if (!list_empty(reqs_list)) {
+ list_for_each_entry(req, reqs_list, node) {
+ if (req_id == req->request_id) {
+ found_req = req;
+ kref_get(&found_req->refcount);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&gna_priv->reqlist_lock);
+
+ return found_req;
+}
+
+int gna_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct gna_device *gna_priv = to_gna_device(dev);
+ union gna_wait *wait_data = data;
+ struct gna_request *score_request;
+ u64 request_id;
+ u32 timeout;
+ int ret = 0;
+
+ request_id = wait_data->in.request_id;
+ timeout = wait_data->in.timeout;
+
+ score_request = gna_find_request_by_id(request_id, gna_priv);
+
+ if (!score_request) {
+ dev_dbg(gna_dev(gna_priv), "could not find request, id: %llu\n", request_id);
+ return -EINVAL;
+ }
+
+ if (score_request->drm_f != file) {
+ dev_dbg(gna_dev(gna_priv), "illegal file_priv: %p != %p\n", score_request->drm_f, file);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = wait_event_interruptible_timeout(score_request->waitq, score_request->state == DONE,
+ msecs_to_jiffies(timeout));
+ if (ret == 0 || ret == -ERESTARTSYS) {
+ dev_dbg(gna_dev(gna_priv), "request timed out, id: %llu\n", request_id);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ wait_data->out.hw_perf = score_request->hw_perf;
+ wait_data->out.drv_perf = score_request->drv_perf;
+ wait_data->out.hw_status = score_request->hw_status;
+
+ ret = score_request->status;
+
+ dev_dbg(gna_dev(gna_priv), "request status: %d, hw status: %#x\n",
+ score_request->status, score_request->hw_status);
+
+ cancel_work_sync(&score_request->work);
+ mutex_lock(&gna_priv->reqlist_lock);
+ if (!list_empty(&score_request->node)) {
+ list_del_init(&score_request->node);
+ kref_put(&score_request->refcount, gna_request_release); // due to gna_priv->request_list removal!
+ }
+ mutex_unlock(&gna_priv->reqlist_lock);
+
+out:
+ kref_put(&score_request->refcount, gna_request_release);
+ return ret;
+}
+
+int gna_gem_free_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct gna_device *gna_priv = to_gna_device(dev);
+ struct gna_gem_free *args = data;
+ struct gna_gem_object *gnagemo;
+ struct drm_gem_object *drmgemo;
+ int ret;
+
+ drmgemo = drm_gem_object_lookup(file, args->handle);
+ if (!drmgemo)
+ return -ENOENT;
+
+ gnagemo = to_gna_gem_obj(to_drm_gem_shmem_obj(drmgemo));
+
+ queue_work(gna_priv->request_wq, &gnagemo->work);
+ if (wait_event_interruptible(gnagemo->waitq, true)) {
+ ret = -ERESTARTSYS;
+ goto out;
+ }
+
+ cancel_work_sync(&gnagemo->work);
+
+ ret = drm_gem_handle_delete(file, args->handle);
+
+out:
+ drm_gem_object_put(drmgemo);
+ return ret;
+}
+
+int gna_getparam_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct gna_device *gna_priv = to_gna_device(dev);
+ union gna_parameter *param = data;
+
+ return gna_getparam(gna_priv, param);
+}
+
+static struct drm_gem_shmem_object *
+drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
+ struct drm_device *dev, size_t size,
+ uint32_t *handle)
+{
+ struct drm_gem_shmem_object *shmem;
+ int ret;
+
+ shmem = drm_gem_shmem_create(dev, size);
+ if (IS_ERR(shmem))
+ return shmem;
+
+ /*
+ * Allocate an id of idr table where the obj is registered
+ * and handle has the id what user can see.
+ */
+ ret = drm_gem_handle_create(file_priv, &shmem->base, handle);
+ /* drop reference from allocate - handle holds it now. */
+ drm_gem_object_put(&shmem->base);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return shmem;
+}
+
+int gna_gem_new_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_gem_shmem_object *drmgemshm;
+ struct gna_gem_object *gnagemo;
+ union gna_gem_new *args = data;
+
+ drmgemshm = drm_gem_shmem_create_with_handle(file, dev, args->in.size,
+ &args->out.handle);
+
+ if (IS_ERR(drmgemshm))
+ return PTR_ERR(drmgemshm);
+
+ args->out.size_granted = drmgemshm->base.size;
+ args->out.vma_fake_offset = drm_vma_node_offset_addr(&drmgemshm->base.vma_node);
+
+ gnagemo = to_gna_gem_obj(drmgemshm);
+ gnagemo->handle = args->out.handle;
+
+ INIT_WORK(&gnagemo->work, gna_gem_obj_release_work);
+ init_waitqueue_head(&gnagemo->waitq);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/gna/gna_mem.c b/drivers/gpu/drm/gna/gna_mem.c
new file mode 100644
index 000000000000..bf57302fbc02
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_mem.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/math.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "gna_device.h"
+#include "gna_gem.h"
+#include "gna_mem.h"
+#include "gna_request.h"
+
+static void gna_mmu_set(struct gna_device *gna_priv)
+{
+ struct gna_mmu_object *mmu;
+ dma_addr_t pagetable_dma;
+ u32 *pgdirn;
+ int i;
+
+ mmu = &gna_priv->mmu;
+
+ pgdirn = mmu->hwdesc->mmu.pagedir_n;
+
+ for (i = 0; i < mmu->num_pagetables; i++) {
+ pagetable_dma = mmu->pagetables_dma[i];
+ pgdirn[i] = pagetable_dma >> PAGE_SHIFT;
+ }
+
+ for (; i < GNA_PGDIRN_LEN; i++)
+ pgdirn[i] = GNA_PGDIR_INVALID;
+}
+
+/* descriptor and page tables allocation */
+int gna_mmu_init(struct gna_device *gna_priv)
+{
+ struct device *parent = gna_dev(gna_priv);
+ struct gna_mmu_object *mmu;
+ int desc_size;
+ int i;
+
+ if (gna_priv->info.num_pagetables > GNA_PGDIRN_LEN) {
+ dev_dbg(gna_dev(gna_priv), "number of pagetables requested too large: %u\n", gna_priv->info.num_pagetables);
+ return -EINVAL;
+ }
+
+ mmu = &gna_priv->mmu;
+
+ desc_size = round_up(gna_priv->info.desc_info.desc_size, PAGE_SIZE);
+
+ mmu->hwdesc = dmam_alloc_coherent(parent, desc_size, &mmu->hwdesc_dma,
+ GFP_KERNEL);
+ if (!mmu->hwdesc)
+ return -ENOMEM;
+
+ mmu->num_pagetables = gna_priv->info.num_pagetables;
+
+ mmu->pagetables_dma = drmm_kmalloc_array(&gna_priv->drm, mmu->num_pagetables, sizeof(*mmu->pagetables_dma),
+ GFP_KERNEL);
+ if (!mmu->pagetables_dma)
+ return -ENOMEM;
+
+ mmu->pagetables = drmm_kmalloc_array(&gna_priv->drm, mmu->num_pagetables, sizeof(*mmu->pagetables), GFP_KERNEL);
+
+ if (!mmu->pagetables)
+ return -ENOMEM;
+
+ for (i = 0; i < mmu->num_pagetables; i++) {
+ mmu->pagetables[i] = dmam_alloc_coherent(parent, PAGE_SIZE,
+ &mmu->pagetables_dma[i], GFP_KERNEL);
+ if (!mmu->pagetables[i])
+ return -ENOMEM;
+ }
+
+ gna_mmu_set(gna_priv);
+
+ return 0;
+}
+
+static struct scatterlist *gna_iterate_sgl(u64 sg_elems, struct scatterlist *sgl, dma_addr_t *sg_page,
+ int *sg_page_len, int *sg_pages)
+{
+ while (sg_elems-- > 0) {
+ (*sg_page) += PAGE_SIZE;
+ (*sg_pages)++;
+ if (*sg_pages == *sg_page_len) {
+ sgl = sg_next(sgl);
+ if (!sgl)
+ break;
+
+ *sg_page = sg_dma_address(sgl);
+ *sg_page_len =
+ round_up(sg_dma_len(sgl), PAGE_SIZE)
+ >> PAGE_SHIFT;
+ *sg_pages = 0;
+ }
+ }
+
+ return sgl;
+}
+
+
+void gna_mmu_add(struct gna_device *gna_priv, struct drm_gem_shmem_object *drmshmemo)
+{
+ struct gna_mmu_object *mmu;
+ struct scatterlist *sgl;
+ dma_addr_t sg_page;
+ int sg_page_len;
+ u32 *pagetable;
+ u32 mmu_page;
+ int sg_pages;
+ int i;
+ int j;
+
+ mmu = &gna_priv->mmu;
+ mutex_lock(&gna_priv->mmu_lock);
+
+ j = mmu->filled_pages;
+ sgl = drmshmemo->sgt->sgl;
+
+ if (!sgl) {
+ dev_warn(gna_dev(gna_priv), "empty scatter list in memory object\n");
+ goto warn_empty_sgl;
+ }
+ sg_page = sg_dma_address(sgl);
+ sg_page_len = round_up(sg_dma_len(sgl), PAGE_SIZE) >> PAGE_SHIFT;
+ sg_pages = 0;
+
+ for (i = mmu->filled_pts; i < mmu->num_pagetables; i++) {
+ if (!sgl)
+ break;
+
+ pagetable = mmu->pagetables[i];
+
+ for (j = mmu->filled_pages; j < GNA_PT_LENGTH; j++) {
+ mmu_page = sg_page >> PAGE_SHIFT;
+ pagetable[j] = mmu_page;
+
+ mmu->filled_pages++;
+
+ sgl = gna_iterate_sgl(1, sgl, &sg_page, &sg_page_len,
+ &sg_pages);
+ if (!sgl)
+ break;
+ }
+
+ if (j == GNA_PT_LENGTH) {
+ mmu->filled_pages = 0;
+ mmu->filled_pts++;
+ }
+ }
+
+ mmu->hwdesc->mmu.vamaxaddr =
+ (mmu->filled_pts * PAGE_SIZE * GNA_PGDIR_ENTRIES) +
+ (mmu->filled_pages * PAGE_SIZE) - 1;
+ dev_dbg(gna_dev(gna_priv), "vamaxaddr: %u\n", mmu->hwdesc->mmu.vamaxaddr);
+
+warn_empty_sgl:
+ mutex_unlock(&gna_priv->mmu_lock);
+}
+
+void gna_mmu_clear(struct gna_device *gna_priv)
+{
+ struct gna_mmu_object *mmu;
+ int i;
+
+ mmu = &gna_priv->mmu;
+ mutex_lock(&gna_priv->mmu_lock);
+
+ for (i = 0; i < mmu->filled_pts; i++)
+ memset(mmu->pagetables[i], 0, PAGE_SIZE);
+
+ if (mmu->filled_pages > 0)
+ memset(mmu->pagetables[mmu->filled_pts], 0, mmu->filled_pages * GNA_PT_ENTRY_SIZE);
+
+ mmu->filled_pts = 0;
+ mmu->filled_pages = 0;
+ mmu->hwdesc->mmu.vamaxaddr = 0;
+
+ mutex_unlock(&gna_priv->mmu_lock);
+}
+
+bool gna_gem_object_put_pages_sgt(struct gna_gem_object *gnagemo)
+{
+ struct drm_gem_shmem_object *shmem = &gnagemo->base;
+ struct drm_gem_object *drmgemo = &shmem->base;
+
+ if (!dma_resv_trylock(shmem->base.resv))
+ return false;
+ dma_unmap_sgtable(drmgemo->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(shmem->sgt);
+ kfree(shmem->sgt);
+ shmem->sgt = NULL;
+ dma_resv_unlock(shmem->base.resv);
+
+ drm_gem_shmem_put_pages(shmem);
+
+ return true;
+}
+
+static void gna_delete_score_requests(u32 handle, struct gna_device *gna_priv)
+{
+ struct gna_request *req, *temp_req;
+ struct list_head *reqs_list;
+ int i;
+
+ mutex_lock(&gna_priv->reqlist_lock);
+
+ reqs_list = &gna_priv->request_list;
+ if (!list_empty(reqs_list)) {
+ list_for_each_entry_safe(req, temp_req, reqs_list, node) {
+ for (i = 0; i < req->buffer_count; ++i) {
+ if (req->buffer_list[i].gna.handle == handle) {
+ list_del_init(&req->node);
+ cancel_work_sync(&req->work);
+ atomic_dec(&gna_priv->enqueued_requests);
+ kref_put(&req->refcount, gna_request_release);
+ break;
+ }
+ }
+ }
+ }
+
+ mutex_unlock(&gna_priv->reqlist_lock);
+}
+
+void gna_gem_obj_release_work(struct work_struct *work)
+{
+ struct gna_gem_object *gnagemo;
+
+ gnagemo = container_of(work, struct gna_gem_object, work);
+
+ gna_delete_score_requests(gnagemo->handle, to_gna_device(gnagemo->base.base.dev));
+
+ wake_up_interruptible(&gnagemo->waitq);
+}
diff --git a/drivers/gpu/drm/gna/gna_mem.h b/drivers/gpu/drm/gna/gna_mem.h
new file mode 100644
index 000000000000..92193f9d608d
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_mem.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_MEM_H__
+#define __GNA_MEM_H__
+
+#include <linux/types.h>
+
+#include "gna_hw.h"
+
+struct gna_gem_object;
+struct work_struct;
+struct gna_device;
+
+struct gna_xnn_descriptor {
+ u32 labase;
+ u16 lacount;
+ u16 _rsvd;
+};
+
+struct gna_mmu {
+ u32 vamaxaddr;
+ u8 __res_204[12];
+ u32 pagedir_n[GNA_PGDIRN_LEN];
+};
+
+struct gna_hw_descriptor {
+ u8 __res_0000[256];
+ struct gna_xnn_descriptor xnn_config;
+ u8 __unused[248];
+ struct gna_mmu mmu;
+};
+
+struct gna_mmu_object {
+ struct gna_hw_descriptor *hwdesc;
+
+ dma_addr_t hwdesc_dma;
+
+ u32 **pagetables;
+ dma_addr_t *pagetables_dma;
+
+ u32 num_pagetables;
+
+ u32 filled_pts;
+ u32 filled_pages;
+};
+
+int gna_mmu_init(struct gna_device *gna_priv);
+
+void gna_mmu_add(struct gna_device *gna_priv, struct drm_gem_shmem_object *drmshmemo);
+
+void gna_mmu_clear(struct gna_device *gna_priv);
+
+bool gna_gem_object_put_pages_sgt(struct gna_gem_object *gna_obj);
+
+void gna_gem_obj_release_work(struct work_struct *work);
+
+#endif // __GNA_MEM_H__
diff --git a/drivers/gpu/drm/gna/gna_pci.c b/drivers/gpu/drm/gna/gna_pci.c
new file mode 100644
index 000000000000..eaae42142bfb
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_pci.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "gna_device.h"
+#include "gna_hw.h"
+#include "gna_pci.h"
+
+static const struct gna_dev_info cnl_dev_info = {
+ .hwid = GNA_DEV_HWID_CNL,
+ GNA_GEN1_FEATURES
+};
+
+static const struct gna_dev_info glk_dev_info = {
+ .hwid = GNA_DEV_HWID_GLK,
+ GNA_GEN1_FEATURES
+};
+
+static const struct gna_dev_info ehl_dev_info = {
+ .hwid = GNA_DEV_HWID_EHL,
+ GNA_GEN1_FEATURES
+};
+
+static const struct gna_dev_info icl_dev_info = {
+ .hwid = GNA_DEV_HWID_ICL,
+ GNA_GEN1_FEATURES
+};
+
+static const struct gna_dev_info jsl_dev_info = {
+ .hwid = GNA_DEV_HWID_JSL,
+ GNA_GEN2_FEATURES
+};
+
+static const struct gna_dev_info tgl_dev_info = {
+ .hwid = GNA_DEV_HWID_TGL,
+ GNA_GEN2_FEATURES
+};
+
+static const struct gna_dev_info rkl_dev_info = {
+ .hwid = GNA_DEV_HWID_RKL,
+ GNA_GEN2_FEATURES
+};
+
+static const struct gna_dev_info adl_dev_info = {
+ .hwid = GNA_DEV_HWID_ADL,
+ GNA_GEN2_FEATURES
+};
+
+static const struct gna_dev_info rpl_dev_info = {
+ .hwid = GNA_DEV_HWID_RPL,
+ GNA_GEN2_FEATURES
+};
+
+static const struct gna_dev_info mtl_dev_info = {
+ .hwid = GNA_DEV_HWID_MTL,
+ GNA_GEN2_FEATURES
+};
+
+#define INTEL_GNA_DEVICE(hwid, info) \
+ { PCI_VDEVICE(INTEL, hwid), (kernel_ulong_t)(info) }
+
+static const struct pci_device_id gna_pci_ids[] = {
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_CNL, &cnl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_EHL, &ehl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_GLK, &glk_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_ICL, &icl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_JSL, &jsl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_TGL, &tgl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_RKL, &rkl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_ADL, &adl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_RPL, &rpl_dev_info),
+ INTEL_GNA_DEVICE(GNA_DEV_HWID_MTL, &mtl_dev_info),
+ { }
+};
+
+static void gna_irq_vectors_fini(void *data)
+{
+ struct pci_dev *pcidev = data;
+
+ pci_free_irq_vectors(pcidev);
+}
+
+static int gna_irq_vectors_init(struct pci_dev *pcidev)
+{
+ int ret;
+
+ ret = pci_alloc_irq_vectors(pcidev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_add_action(&pcidev->dev, gna_irq_vectors_fini, pcidev);
+ if (ret)
+ gna_irq_vectors_fini(pcidev);
+
+ return ret;
+}
+
+int gna_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *pci_id)
+{
+ struct gna_dev_info *dev_info;
+ void __iomem *iobase;
+ int irq;
+ int err;
+
+ err = pcim_enable_device(pcidev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pcidev, BIT(0), pci_name(pcidev));
+ if (err)
+ return err;
+
+ iobase = pcim_iomap_table(pcidev)[0];
+
+ pci_set_master(pcidev);
+
+ err = gna_irq_vectors_init(pcidev);
+ if (err < 0)
+ return err;
+
+ irq = pci_irq_vector(pcidev, 0);
+ if (irq < 0)
+ return irq;
+
+ dev_info = (struct gna_dev_info *)pci_id->driver_data;
+
+ err = gna_probe(&pcidev->dev, dev_info, iobase, irq);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static struct pci_driver gna_pci_driver = {
+ .name = DRIVER_NAME,
+ .id_table = gna_pci_ids,
+ .probe = gna_pci_probe,
+ .driver = {
+ .pm = &gna_pm,
+ },
+};
+
+module_pci_driver(gna_pci_driver);
+
+MODULE_DEVICE_TABLE(pci, gna_pci_ids);
diff --git a/drivers/gpu/drm/gna/gna_pci.h b/drivers/gpu/drm/gna/gna_pci.h
new file mode 100644
index 000000000000..b651fa2e6ea1
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_pci.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_PCI_H__
+#define __GNA_PCI_H__
+
+struct pci_device_id;
+struct pci_dev;
+
+int gna_pci_probe(struct pci_dev *dev, const struct pci_device_id *id);
+
+#endif /* __GNA_PCI_H__ */
diff --git a/drivers/gpu/drm/gna/gna_request.c b/drivers/gpu/drm/gna/gna_request.c
new file mode 100644
index 000000000000..1d9572bedcc2
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_request.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/timekeeping.h>
+#include <linux/uaccess.h>
+
+#include "gna_device.h"
+#include "gna_hw.h"
+#include "gna_mem.h"
+#include "gna_request.h"
+#include "gna_score.h"
+
+int gna_validate_score_config(struct gna_compute_cfg *compute_cfg,
+ struct gna_device *gna_priv)
+{
+ size_t buffers_size;
+
+ if (compute_cfg->gna_mode > GNA_MODE_XNN) {
+ dev_dbg(gna_dev(gna_priv), "invalid mode: %d\n", compute_cfg->gna_mode);
+ return -EINVAL;
+ }
+
+ if (compute_cfg->layer_count > gna_priv->info.max_layer_count) {
+ dev_dbg(gna_dev(gna_priv), "max layer count exceeded: %u > %u\n",
+ compute_cfg->layer_count, gna_priv->info.max_layer_count);
+ return -EINVAL;
+ }
+
+ if (compute_cfg->buffer_count == 0) {
+ dev_dbg(gna_dev(gna_priv), "no buffers\n");
+ return -EINVAL;
+ }
+
+ buffers_size = sizeof(struct gna_buffer) * compute_cfg->buffer_count;
+ if (!access_ok(u64_to_user_ptr(compute_cfg->buffers_ptr), buffers_size))
+ return -EACCES;
+
+ return 0;
+}
+
+static void gna_request_update_status(struct gna_request *score_request)
+{
+ struct gna_device *gna_priv = to_gna_device(score_request->drm_f->minor->dev);
+ /* The gna_priv's hw_status should be updated first */
+ u32 hw_status = gna_priv->hw_status;
+ u32 stall_cycles;
+ u32 total_cycles;
+
+ /* Technically, the time stamp can be a bit later than
+ * when the hw actually completed scoring. Here we just
+ * do our best in a deferred work, unless we want to
+ * tax isr for a more accurate record.
+ */
+ score_request->drv_perf.hw_completed = ktime_get_ns();
+
+ score_request->hw_status = hw_status;
+
+ score_request->status = gna_parse_hw_status(gna_priv, hw_status);
+
+ if (gna_hw_perf_enabled(gna_priv)) {
+ if (hw_status & GNA_STS_STATISTICS_VALID) {
+ total_cycles = gna_reg_read(gna_priv, GNA_MMIO_PTC);
+ stall_cycles = gna_reg_read(gna_priv, GNA_MMIO_PSC);
+ score_request->hw_perf.total = total_cycles;
+ score_request->hw_perf.stall = stall_cycles;
+ } else
+ dev_warn(gna_dev(gna_priv), "GNA statistics missing\n");
+ }
+ if (unlikely(hw_status & GNA_ERROR))
+ gna_print_error_status(gna_priv, hw_status);
+}
+
+static void gna_request_make_zombie(struct gna_request *score_request)
+{
+ int i;
+
+ for (i = 0; i < score_request->buffer_count; i++) {
+ kvfree((void *)(uintptr_t)score_request->buffer_list[i].gna.patches_ptr);
+ drm_gem_object_put(&score_request->buffer_list[i].gem->base.base);
+ }
+ kvfree(score_request->buffer_list);
+ score_request->buffer_list = NULL;
+ score_request->buffer_count = 0;
+}
+
+static void gna_request_process(struct work_struct *work)
+{
+ struct gna_buffer_with_object *buffer;
+ struct gna_request *score_request;
+ struct gna_device *gna_priv;
+ unsigned long hw_timeout;
+ int ret;
+ u64 i;
+
+ score_request = container_of(work, struct gna_request, work);
+ gna_priv = to_gna_device(score_request->drm_f->minor->dev);
+
+ score_request->state = ACTIVE;
+
+ score_request->drv_perf.pre_processing = ktime_get_ns();
+
+ ret = pm_runtime_get_sync(gna_dev(gna_priv));
+ if (ret < 0 && ret != -EACCES) {
+ dev_warn(gna_dev(gna_priv), "pm_runtime_get_sync() failed: %d\n", ret);
+ score_request->status = -ENODEV;
+ pm_runtime_put_noidle(gna_dev(gna_priv));
+ goto tail;
+ }
+
+ /* Set busy flag before kicking off HW. The isr will clear it and wake up us. There is
+ * no difference if isr is missed in a timeout situation of the last request. We just
+ * always set it busy and let the wait_event_timeout check the reset.
+ * wq: X -> true
+ * isr: X -> false
+ */
+ gna_priv->dev_busy = true;
+
+ ret = gna_score(score_request);
+ if (ret) {
+ if (pm_runtime_put(gna_dev(gna_priv)) < 0)
+ dev_warn(gna_dev(gna_priv), "pm_runtime_put() failed: %d\n", ret);
+ score_request->status = ret;
+ goto tail;
+ }
+
+ score_request->drv_perf.processing = ktime_get_ns();
+
+ hw_timeout = gna_priv->recovery_timeout_jiffies;
+
+ hw_timeout = wait_event_timeout(gna_priv->dev_busy_waitq,
+ !gna_priv->dev_busy, hw_timeout);
+
+ if (!hw_timeout)
+ dev_warn(gna_dev(gna_priv), "hardware timeout occurred\n");
+
+ gna_priv->hw_status = gna_reg_read(gna_priv, GNA_MMIO_STS);
+
+ gna_request_update_status(score_request);
+
+ ret = gna_abort_hw(gna_priv);
+ if (ret < 0 && score_request->status == 0)
+ score_request->status = ret; // -ETIMEDOUT
+
+ ret = pm_runtime_put(gna_dev(gna_priv));
+ if (ret < 0)
+ dev_warn(gna_dev(gna_priv), "pm_runtime_put() failed: %d\n", ret);
+
+ gna_mmu_clear(gna_priv);
+
+ for (i = 0, buffer = score_request->buffer_list; i < score_request->buffer_count; i++, buffer++)
+ gna_gem_object_put_pages_sgt(buffer->gem);
+
+tail:
+ score_request->drv_perf.completion = ktime_get_ns();
+ score_request->state = DONE;
+ gna_request_make_zombie(score_request);
+
+ atomic_dec(&gna_priv->enqueued_requests);
+ wake_up_interruptible_all(&score_request->waitq);
+}
+
+static struct gna_request *gna_request_create(struct drm_file *file,
+ struct gna_compute_cfg *compute_cfg)
+{
+
+ struct gna_device *gna_priv = file->driver_priv;
+ struct gna_request *score_request;
+
+ if (IS_ERR(gna_priv))
+ return NULL;
+
+ score_request = kzalloc(sizeof(*score_request), GFP_KERNEL);
+ if (!score_request)
+ return NULL;
+ kref_init(&score_request->refcount);
+
+ dev_dbg(gna_dev(gna_priv), "labase: %d, lacount: %d\n",
+ compute_cfg->layer_base, compute_cfg->layer_count);
+
+ score_request->request_id = atomic_inc_return(&gna_priv->request_count);
+ score_request->compute_cfg = *compute_cfg;
+ score_request->drm_f = file;
+ score_request->state = NEW;
+ init_waitqueue_head(&score_request->waitq);
+ INIT_WORK(&score_request->work, gna_request_process);
+ INIT_LIST_HEAD(&score_request->node);
+
+ return score_request;
+}
+
+/*
+ * returns true if [inner_offset, inner_size) is embraced by [0, outer_size). False otherwise.
+ */
+static bool gna_validate_ranges(u64 outer_size, u64 inner_offset, u64 inner_size)
+{
+ return inner_offset < outer_size &&
+ inner_size <= (outer_size - inner_offset);
+}
+
+static int gna_validate_patches(struct gna_device *gna_priv, __u64 buffer_size,
+ struct gna_memory_patch *patches, u64 count)
+{
+ u64 idx;
+
+ for (idx = 0; idx < count; ++idx) {
+ if (patches[idx].size > 8) {
+ dev_dbg(gna_dev(gna_priv), "invalid patch size: %llu\n", patches[idx].size);
+ return -EINVAL;
+ }
+
+ if (!gna_validate_ranges(buffer_size, patches[idx].offset, patches[idx].size)) {
+ dev_dbg(gna_dev(gna_priv),
+ "patch out of bounds. buffer size: %llu, patch offset/size:%llu/%llu\n",
+ buffer_size, patches[idx].offset, patches[idx].size);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int gna_buffer_fill_patches(struct gna_buffer *buffer, struct gna_device *gna_priv)
+{
+ __u64 patches_user = buffer->patches_ptr;
+ struct gna_memory_patch *patches;
+ /* At this point, the buffer points to a memory region in kernel space where the copied
+ * patches_ptr also lives, but the value of it is still an address from user space. This
+ * function will set patches_ptr to either an address in kernel space or null before it
+ * exits.
+ */
+ u64 patch_count;
+ int ret;
+
+ buffer->patches_ptr = 0;
+ patch_count = buffer->patch_count;
+ if (!patch_count)
+ return 0;
+
+ patches = kvmalloc_array(patch_count, sizeof(struct gna_memory_patch), GFP_KERNEL);
+ if (!patches)
+ return -ENOMEM;
+
+ if (copy_from_user(patches, u64_to_user_ptr(patches_user),
+ sizeof(struct gna_memory_patch) * patch_count)) {
+ ret = -EFAULT;
+ goto err_fill_patches;
+ }
+
+ ret = gna_validate_patches(gna_priv, buffer->size, patches, patch_count);
+ if (ret) {
+ dev_dbg(gna_dev(gna_priv), "buffer %p: patches' validation failed\n", buffer);
+ goto err_fill_patches;
+ }
+
+ buffer->patches_ptr = (uintptr_t)patches;
+
+ return 0;
+
+err_fill_patches:
+ kvfree(patches);
+ return ret;
+}
+
+static int gna_request_fill_buffers(struct gna_request *score_request,
+ struct gna_compute_cfg *compute_cfg)
+{
+ struct gna_buffer_with_object *buffer_list;
+ struct gna_buffer_with_object *buffer;
+ struct gna_buffer *cfg_buffers;
+ struct drm_gem_object *drmgemo;
+ struct gna_device *gna_priv;
+ u64 buffers_total_size = 0;
+ size_t gem_obj_size;
+ u64 buffer_count;
+ u32 handle;
+ u64 i, j;
+ int ret;
+
+
+ gna_priv = to_gna_device(score_request->drm_f->minor->dev);
+
+ buffer_count = compute_cfg->buffer_count;
+ buffer_list = kvmalloc_array(buffer_count, sizeof(*buffer_list), GFP_KERNEL);
+ if (!buffer_list)
+ return -ENOMEM;
+
+ cfg_buffers = u64_to_user_ptr(compute_cfg->buffers_ptr);
+ for (i = 0; i < buffer_count; ++i) {
+ if (copy_from_user(&buffer_list[i].gna, cfg_buffers+i,
+ sizeof(*buffer_list))) {
+ ret = -EFAULT;
+ goto err_free_buffers;
+ }
+ buffer_list[i].gem = NULL;
+ }
+
+ for (i = 0; i < buffer_count; i++) {
+ buffer = &buffer_list[i];
+ handle = buffer->gna.handle;
+
+ if (buffer->gna.offset != 0) {
+ dev_dbg(gna_dev(gna_priv), "buffer->offset = %llu for handle %u in score config\n",
+ buffer->gna.offset, buffer->gna.handle);
+ return -EINVAL;
+ }
+
+ for (j = 0; j < i; j++) {
+ if (buffer_list[j].gna.handle == handle) {
+ dev_dbg(gna_dev(gna_priv),
+ "doubled memory id in score config; id:%u\n", handle);
+ ret = -EINVAL;
+ goto err_zero_patch_user_ptr;
+ }
+ }
+
+ buffers_total_size +=
+ round_up(buffer->gna.size, PAGE_SIZE);
+ if (buffers_total_size > gna_priv->info.max_hw_mem) {
+ dev_dbg(gna_dev(gna_priv), "buffers' %p total size too big\n", buffer);
+ ret = -EINVAL;
+ goto err_zero_patch_user_ptr;
+ }
+
+ drmgemo = drm_gem_object_lookup(score_request->drm_f, handle);
+
+ if (!drmgemo) {
+ dev_dbg(gna_dev(gna_priv), "memory object %u not found\n", handle);
+ ret = -EINVAL;
+ goto err_zero_patch_user_ptr;
+ }
+
+ // we are still in sys call context, but prior request is enqueued.
+ // request may slip into queue while some gna_gem_object being deleted
+ // border case + not too much harm.
+ buffer->gem = to_gna_gem_obj(to_drm_gem_shmem_obj(drmgemo));
+
+ gem_obj_size = drmgemo->size;
+
+ if (!gna_validate_ranges(gem_obj_size, 0, buffer->gna.size)) {
+ dev_dbg(gna_dev(gna_priv),
+ "buffer out of bounds. mo size: %zu, buffer size:%llu\n",
+ gem_obj_size, buffer->gna.size);
+ ret = -EINVAL;
+ goto err_zero_patch_user_ptr;
+ }
+
+ ret = gna_buffer_fill_patches(&buffer->gna, gna_priv);
+ if (ret)
+ goto err_free_patches;
+ }
+
+ score_request->buffer_list = buffer_list;
+ score_request->buffer_count = buffer_count;
+
+ return 0;
+
+err_zero_patch_user_ptr:
+ /* patches_ptr may still hold an address in userspace.
+ * Don't pass it to kvfree().
+ */
+ buffer->gna.patches_ptr = 0;
+
+err_free_patches:
+ /* patches_ptr of each processed buffer should be either
+ * null or pointing to an allocated memory block in the
+ * kernel at this point.
+ */
+ for (j = 0; j <= i; j++) {
+ kvfree((void *)(uintptr_t)buffer_list[j].gna.patches_ptr);
+ drm_gem_object_put(&buffer_list[j].gem->base.base);
+ }
+
+err_free_buffers:
+ kvfree(buffer_list);
+ return ret;
+}
+
+int gna_enqueue_request(struct gna_compute_cfg *compute_cfg,
+ struct drm_file *file, u64 *request_id)
+{
+ bool is_qos = !!(compute_cfg->flags & GNA_FLAG_SCORE_QOS);
+ struct gna_device *gna_priv = file->driver_priv;
+ struct gna_request *score_request;
+ u64 pos_in_queue;
+ int ret;
+
+ pos_in_queue = atomic_inc_return(&gna_priv->enqueued_requests);
+ if (is_qos && pos_in_queue != 1) {
+ ret = -EBUSY;
+ goto ERR_UNQUEUE_REQUEST;
+ }
+
+ score_request = gna_request_create(file, compute_cfg);
+ if (!score_request) {
+ ret = -ENOMEM;
+ goto ERR_UNQUEUE_REQUEST;
+ }
+
+ ret = gna_request_fill_buffers(score_request, compute_cfg);
+ if (ret) {
+ kref_put(&score_request->refcount, gna_request_release);
+ goto ERR_UNQUEUE_REQUEST;
+ }
+
+ kref_get(&score_request->refcount);
+ mutex_lock(&gna_priv->reqlist_lock);
+ list_add_tail(&score_request->node, &gna_priv->request_list);
+ mutex_unlock(&gna_priv->reqlist_lock);
+
+ queue_work(gna_priv->request_wq, &score_request->work);
+ kref_put(&score_request->refcount, gna_request_release);
+
+ *request_id = score_request->request_id;
+
+ return 0;
+
+ERR_UNQUEUE_REQUEST:
+ atomic_dec(&gna_priv->enqueued_requests);
+ return ret;
+}
+
+void gna_request_release(struct kref *ref)
+{
+ struct gna_request *score_request =
+ container_of(ref, struct gna_request, refcount);
+ gna_request_make_zombie(score_request);
+ wake_up_interruptible_all(&score_request->waitq);
+ kfree(score_request);
+}
diff --git a/drivers/gpu/drm/gna/gna_request.h b/drivers/gpu/drm/gna/gna_request.h
new file mode 100644
index 000000000000..d056e70fb369
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_request.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_REQUEST_H__
+#define __GNA_REQUEST_H__
+
+#include <linux/kref.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include <uapi/drm/gna_drm.h>
+
+struct gna_device;
+struct gna_gem_object;
+struct drm_file;
+
+enum gna_request_state {
+ NEW,
+ ACTIVE,
+ DONE,
+};
+
+struct gna_buffer_with_object {
+ struct gna_buffer gna;
+ struct gna_gem_object *gem;
+};
+
+struct gna_request {
+ u64 request_id;
+
+ struct kref refcount;
+
+ struct drm_file *drm_f;
+
+ u32 hw_status;
+
+ enum gna_request_state state;
+
+ int status;
+
+ struct gna_hw_perf hw_perf;
+ struct gna_drv_perf drv_perf;
+
+ struct list_head node;
+
+ struct gna_compute_cfg compute_cfg;
+
+ struct gna_buffer_with_object *buffer_list;
+ u64 buffer_count;
+
+ struct work_struct work;
+ struct wait_queue_head waitq;
+};
+
+int gna_validate_score_config(struct gna_compute_cfg *compute_cfg,
+ struct gna_device *gna_priv);
+
+int gna_enqueue_request(struct gna_compute_cfg *compute_cfg,
+ struct drm_file *file, u64 *request_id);
+
+void gna_request_release(struct kref *ref);
+
+#endif // __GNA_REQUEST_H__
diff --git a/drivers/gpu/drm/gna/gna_score.c b/drivers/gpu/drm/gna/gna_score.c
new file mode 100644
index 000000000000..529270657a83
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_score.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2017-2022 Intel Corporation
+
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+
+#include <linux/dma-buf.h>
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <uapi/drm/gna_drm.h>
+
+#include "../drm_internal.h"
+
+#include "gna_device.h"
+#include "gna_gem.h"
+#include "gna_hw.h"
+#include "gna_mem.h"
+#include "gna_request.h"
+#include "gna_score.h"
+
+static int gna_do_patch_memory(struct gna_device *gna_priv,
+ struct gna_memory_patch *patch, void *vaddr)
+{
+ size_t size;
+ void *dest;
+ u64 value;
+
+ value = patch->value;
+ size = patch->size;
+ dest = (u8 *)vaddr + patch->offset;
+
+ switch (size) {
+ case 0:
+ return -EFAULT;
+ case sizeof(u8):
+ *((u8 *)dest) = (u8)value;
+ break;
+ case sizeof(u16):
+ *((u16 *)dest) = (u16)value;
+ break;
+ case sizeof(u32):
+ *((u32 *)dest) = (u32)value;
+ break;
+ case sizeof(u64):
+ *((u64 *)dest) = (u64)value;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gna_patch_memory(struct gna_device *gna_priv, struct gna_buffer_with_object *buffer)
+{
+ struct drm_gem_shmem_object *drmshmemo = &buffer->gem->base;
+ struct gna_gem_object *gnagemo = buffer->gem;
+ struct gna_buffer *gnab = &buffer->gna;
+ struct gna_memory_patch *patch;
+ struct iosys_map vmap;
+ struct sg_table *sgt;
+ int ret = 0;
+ u32 i;
+
+ dev_dbg(gna_dev(gna_priv), "handle: %u, patch_count, %llu\n",
+ gnab->handle, gnab->patch_count);
+
+ sgt = drm_gem_shmem_get_pages_sgt(drmshmemo);
+
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto err;
+ }
+
+ if (gnab->patch_count) {
+ ret = drm_gem_vmap(&drmshmemo->base, &vmap);
+
+ if (ret)
+ goto err_pages_sgt;
+
+ patch = (struct gna_memory_patch *)(uintptr_t)gnab->patches_ptr;
+ for (i = 0; i < gnab->patch_count; i++, patch++) {
+ ret = gna_do_patch_memory(gna_priv, patch, vmap.vaddr);
+ if (ret)
+ break;
+ }
+
+ kvfree((void *)(uintptr_t)gnab->patches_ptr);
+ gnab->patches_ptr = 0;
+ drm_gem_vunmap(&drmshmemo->base, &vmap);
+ if (ret) // ret from gna_do_patch_memory
+ goto err_pages_sgt;
+ }
+
+ gna_mmu_add(gna_priv, drmshmemo);
+
+ return 0;
+
+err_pages_sgt:
+ gna_gem_object_put_pages_sgt(gnagemo);
+err:
+ return ret;
+}
+
+static struct gna_buffer_with_object *gna_find_buffer(struct gna_buffer_with_object *buffer_list,
+ u32 buffer_count, u32 mmu_offset, u32 *memory_offset)
+{
+ struct gna_buffer_with_object *buffer;
+ u32 memory_size;
+ u32 offset;
+ u32 i;
+
+ offset = 0;
+ for (i = 0; i < buffer_count; i++) {
+ buffer = buffer_list + i;
+ memory_size = round_up(buffer->gna.size, PAGE_SIZE);
+ if (mmu_offset < offset + memory_size) {
+ *memory_offset = offset;
+ return buffer;
+ }
+ offset += memory_size;
+ }
+
+ return NULL;
+}
+
+static int gna_copy_gmm_config(struct gna_device *gna_priv,
+ struct gna_buffer_with_object *buffer_list,
+ u32 buffer_count, u32 mmu_offset)
+{
+ struct gna_buffer_with_object *buffer;
+ struct gna_hw_descriptor *hwdesc;
+ struct drm_gem_object *drmgemo;
+ struct gna_mmu_object *mmu;
+ struct iosys_map vmap;
+ u32 memory_offset;
+ u8 *gmm_desc;
+ int ret = 0;
+
+ mmu = &gna_priv->mmu;
+ hwdesc = mmu->hwdesc;
+
+ buffer = gna_find_buffer(buffer_list, buffer_count, mmu_offset, &memory_offset);
+ if (!buffer)
+ return -EINVAL;
+
+ drmgemo = &buffer->gem->base.base;
+
+ ret = drm_gem_vmap(drmgemo, &vmap);
+
+ if (!ret) {
+ ret = -ENOMEM;
+ return ret;
+ }
+
+ gmm_desc = (u8 *)vmap.vaddr + (mmu_offset - memory_offset);
+ memcpy(&hwdesc->xnn_config, gmm_desc, sizeof(struct gna_xnn_descriptor));
+ drm_gem_vunmap(drmgemo, &vmap);
+
+ return 0;
+}
+
+int gna_score(struct gna_request *score_request)
+{
+ struct gna_buffer_with_object *buffer;
+ struct gna_xnn_descriptor *xnn_config;
+ struct gna_compute_cfg *compute_cfg;
+ struct gna_device *gna_priv;
+ struct gna_mmu_object *mmu;
+ u64 buffer_count;
+ u32 desc_base;
+ int ret;
+ u64 i;
+
+ ret = 0;
+
+ gna_priv = to_gna_device(score_request->drm_f->minor->dev);
+
+ mmu = &gna_priv->mmu;
+ xnn_config = &mmu->hwdesc->xnn_config;
+ compute_cfg = &score_request->compute_cfg;
+
+ buffer_count = score_request->buffer_count;
+
+ for (i = 0, buffer = score_request->buffer_list; i < buffer_count; i++, buffer++) {
+ ret = gna_patch_memory(gna_priv, buffer);
+ if (ret)
+ goto err;
+ }
+
+ switch (compute_cfg->gna_mode) {
+ case GNA_MODE_XNN:
+ dev_dbg(gna_dev(gna_priv), "xNN mode; labase: %d, lacount: %d\n",
+ compute_cfg->layer_base, compute_cfg->layer_count);
+ xnn_config->labase = compute_cfg->layer_base;
+ xnn_config->lacount = compute_cfg->layer_count;
+ break;
+ case GNA_MODE_GMM:
+ dev_dbg(gna_dev(gna_priv), "GMM mode; offset: %d\n", compute_cfg->layer_base);
+ ret = gna_copy_gmm_config(gna_priv, score_request->buffer_list,
+ buffer_count, compute_cfg->layer_base);
+ if (ret)
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+
+ desc_base = (u32)(mmu->hwdesc_dma >> PAGE_SHIFT);
+ gna_reg_write(gna_priv, GNA_MMIO_DESBASE, desc_base);
+
+ gna_start_scoring(gna_priv, compute_cfg);
+
+err:
+ return ret;
+}
diff --git a/drivers/gpu/drm/gna/gna_score.h b/drivers/gpu/drm/gna/gna_score.h
new file mode 100644
index 000000000000..5b154d3623e0
--- /dev/null
+++ b/drivers/gpu/drm/gna/gna_score.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef __GNA_SCORE_H__
+#define __GNA_SCORE_H__
+
+struct gna_request;
+
+int gna_score(struct gna_request *score_request);
+
+#endif // __GNA_SCORE_H__
diff --git a/include/uapi/drm/gna_drm.h b/include/uapi/drm/gna_drm.h
new file mode 100644
index 000000000000..677343d88987
--- /dev/null
+++ b/include/uapi/drm/gna_drm.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/* Copyright(c) 2017-2022 Intel Corporation */
+
+#ifndef _GNA_DRM_H_
+#define _GNA_DRM_H_
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+#include "drm.h"
+
+#define GNA_DDI_VERSION_3 3
+
+/* Operation modes */
+#define GNA_MODE_GMM 0
+#define GNA_MODE_XNN 1
+
+#define GNA_PARAM_RECOVERY_TIMEOUT 1
+#define GNA_PARAM_DEVICE_TYPE 2
+#define GNA_PARAM_INPUT_BUFFER_S 3
+#define GNA_PARAM_DDI_VERSION 4
+
+#define GNA_STS_SCORE_COMPLETED _BITUL(0)
+#define GNA_STS_STATISTICS_VALID _BITUL(3)
+#define GNA_STS_PCI_MMU_ERR _BITUL(4)
+#define GNA_STS_PCI_DMA_ERR _BITUL(5)
+#define GNA_STS_PCI_UNEXCOMPL_ERR _BITUL(6)
+#define GNA_STS_VA_OOR _BITUL(7)
+#define GNA_STS_PARAM_OOR _BITUL(8)
+#define GNA_STS_SATURATE _BITUL(17)
+
+#define GNA_ERROR \
+ (GNA_STS_PCI_DMA_ERR |\
+ GNA_STS_PCI_MMU_ERR |\
+ GNA_STS_PCI_UNEXCOMPL_ERR |\
+ GNA_STS_PARAM_OOR |\
+ GNA_STS_VA_OOR)
+
+#define GNA_DEV_TYPE_0_9 0x09
+#define GNA_DEV_TYPE_1_0 0x10
+#define GNA_DEV_TYPE_2_0 0x20
+#define GNA_DEV_TYPE_3_0 0x30
+#define GNA_DEV_TYPE_3_5 0x35
+
+#define GNA_FLAG_SCORE_QOS _BITUL(0)
+
+/*
+ * Structure describes part of memory to be overwritten before starting GNA
+ */
+struct gna_memory_patch {
+ /* offset from targeted memory */
+ __u64 offset;
+
+ __u64 size;
+ __u64 value;
+};
+
+struct gna_buffer {
+ __u32 handle;
+ __u32 pad;
+
+ __u64 offset;
+ __u64 size;
+
+ __u64 patch_count;
+ __u64 patches_ptr;
+};
+
+/*
+ * Driver performance timestamps in nanoseconds.
+ * Values regard system boot time, but do not count during suspend.
+ */
+struct gna_drv_perf {
+ __u64 pre_processing; /* driver starts pre-processing */
+ __u64 processing; /* hw starts processing */
+ __u64 hw_completed; /* hw finishes processing */
+ __u64 completion; /* driver finishes post-processing */
+};
+
+struct gna_hw_perf {
+ __u64 total;
+ __u64 stall;
+};
+
+struct gna_compute_cfg {
+ __u32 layer_base;
+ __u32 layer_count;
+
+ /* List of GNA memory buffers */
+ __u64 buffers_ptr;
+ __u64 buffer_count;
+
+ __u8 active_list_on;
+ __u8 gna_mode;
+ __u8 hw_perf_encoding;
+ __u8 flags;
+
+ __u8 pad[4];
+};
+
+typedef __u64 gna_param_id;
+
+union gna_parameter {
+ struct {
+ gna_param_id id;
+ } in;
+
+ struct {
+ __u64 value;
+ } out;
+};
+
+union gna_compute {
+ struct {
+ struct gna_compute_cfg config;
+ } in;
+
+ struct {
+ __u64 request_id;
+ } out;
+};
+
+union gna_wait {
+ struct {
+ __u64 request_id;
+ __u32 timeout;
+ __u32 pad;
+ } in;
+
+ struct {
+ __u32 hw_status;
+ __u32 pad;
+ struct gna_drv_perf drv_perf;
+ struct gna_hw_perf hw_perf;
+ } out;
+};
+
+struct gna_mem_id {
+ __u32 handle;
+ __u32 pad;
+ __u64 vma_fake_offset;
+ __u64 size_granted;
+};
+
+union gna_gem_new {
+ struct {
+ __u64 size;
+ } in;
+
+ struct gna_mem_id out;
+};
+
+struct gna_gem_free {
+ __u32 handle;
+};
+
+#define DRM_GNA_GET_PARAMETER 0x00
+#define DRM_GNA_GEM_NEW 0x01
+#define DRM_GNA_GEM_FREE 0x02
+#define DRM_GNA_COMPUTE 0x03
+#define DRM_GNA_WAIT 0x04
+
+#define DRM_IOCTL_GNA_GET_PARAMETER DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GET_PARAMETER, union gna_parameter)
+#define DRM_IOCTL_GNA_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GEM_NEW, union gna_gem_new)
+#define DRM_IOCTL_GNA_GEM_FREE DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GEM_FREE, struct gna_gem_free)
+#define DRM_IOCTL_GNA_COMPUTE DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_COMPUTE, union gna_compute)
+#define DRM_IOCTL_GNA_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_WAIT, union gna_wait)
+
+#endif /* _GNA_DRM_H_ */
--
2.44.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment