From 2aff81e039de5b0b7ef6bdcb2c320f121f69e2b4 Mon Sep 17 00:00:00 2001 From: "Everest K.C." Date: Wed, 23 Oct 2024 17:33:55 -0600 Subject: [PATCH 001/882] drm/xe/guc: Fix dereference before NULL check The pointer list->list is dereferenced before the NULL check. Fix this by moving the NULL check outside the for loop, so that the check is performed before the dereferencing. The list->list pointer cannot be NULL so this has no effect on runtime. It's just a correctness issue. This issue was reported by Coverity Scan. https://scan7.scan.coverity.com/#/project-view/51525/11354?selectedIssue=1600335 Fixes: 0f1fdf559225 ("drm/xe/guc: Save manual engine capture into capture list") Signed-off-by: Everest K.C. Reviewed-by: Dan Carpenter Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20241023233356.5479-1-everestkc@everestkc.com.np --- drivers/gpu/drm/xe/xe_guc_capture.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 8b6cb786a2aaf..cc72446a5de11 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1531,7 +1531,7 @@ read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_gro { int i; - if (!list || list->num_regs == 0) + if (!list || !list->list || list->num_regs == 0) return; if (!regs) @@ -1541,9 +1541,6 @@ read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_gro struct __guc_mmio_reg_descr desc = list->list[i]; u32 value; - if (!list->list) - return; - if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) { value = xe_hw_engine_mmio_read32(hwe, desc.reg); } else { -- GitLab From 8262db9eff5816e757cbe5655728922784d8a802 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 29 Oct 2024 12:32:58 -0700 Subject: [PATCH 002/882] drm/xe: Move Wa 1607983814 to oob needs_wa_1607983814() predates wa_oob, so it was not being printed in /sys/kernel/debug/dri/0/*/workarounds. Port it to OOB rules. This makes the WA show up in debugfs. For TGL: OOB Workarounds 1607983814 22012773006 1409600907 Eventually the RTP infra may add support for writing registers in a loop, which would allow to keep track of the registers as well. But for now, just listing it as OOB workaround is already an improvement. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241029193258.749882-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ads.c | 11 ++--------- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 4e746ae98888f..943146e5b460d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -231,11 +231,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads) guc_ads_private_data_size(ads); } -static bool needs_wa_1607983814(struct xe_device *xe) -{ - return GRAPHICS_VERx100(xe) < 1250; -} - static size_t calculate_regset_size(struct xe_gt *gt) { struct xe_reg_sr_entry *sr_entry; @@ -250,7 +245,7 @@ static size_t calculate_regset_size(struct xe_gt *gt) count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; - if (needs_wa_1607983814(gt_to_xe(gt))) + if (XE_WA(gt, 1607983814)) count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); @@ -709,7 +704,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, struct iosys_map *regset_map, struct xe_hw_engine *hwe) { - struct xe_device *xe = ads_to_xe(ads); struct xe_hw_engine *hwe_rcs_reset_domain = xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); struct xe_reg_sr_entry *entry; @@ -740,8 +734,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } - /* Wa_1607983814 */ - if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, XELP_LNCFCMOCS(i), count++); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index bcd04464b85e8..3ed12a85cc60f 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,3 +1,4 @@ +1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) -- GitLab From 23ea2c7572d4735ef66beb1e4feb8ae510b78247 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 8 Oct 2024 13:06:27 +0530 Subject: [PATCH 003/882] drm/xe: Set mask bits for CCS_MODE register CCS_MODE register requires setting mask bits from Xe2+ platforms. Set the mask bits unconditionally, as those bits are unused for older platforms. Signed-off-by: Balasubramani Vivekanandan Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241008073628.377433-2-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 42dc55cb23f4a..0c9e4b2fafab8 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -528,7 +528,7 @@ * [4-6] RSVD * [7] Disabled */ -#define CCS_MODE XE_REG(0x14804) +#define CCS_MODE XE_REG(0x14804, XE_REG_OPTION_MASKED) #define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */ #define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */ #define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 9360ac4de489e..246190b3e2bbd 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -68,6 +68,12 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) } } + /* + * Mask bits need to be set for the register. Though only Xe2+ + * platforms require setting of mask bits, it won't harm for older + * platforms as these bits are unused there. + */ + mode |= CCS_MODE_CSLICE_0_3_MASK << 16; xe_mmio_write32(>->mmio, CCS_MODE, mode); xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", -- GitLab From 1c35f1ed1fe3c649f8c16214d0d3dd828b5265d9 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 8 Oct 2024 13:06:28 +0530 Subject: [PATCH 004/882] drm/xe: Use the filelist from drm for ccs_mode change Drop the exclusive client count tracking and use the filelist from the drm to track the active clients. This also ensures the clients created internally by the driver won't block changing the ccs mode. Fixes: ce8c161cbad4 ("drm/xe: Add ref counting for xe_file") Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241008073628.377433-3-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 10 ---------- drivers/gpu/drm/xe/xe_device_types.h | 9 --------- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 9 +++++---- 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2da4affe4dfdc..8487745c8b8bd 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -88,10 +88,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) mutex_init(&xef->exec_queue.lock); xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); - spin_lock(&xe->clients.lock); - xe->clients.count++; - spin_unlock(&xe->clients.lock); - file->driver_priv = xef; kref_init(&xef->refcount); @@ -108,17 +104,12 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) static void xe_file_destroy(struct kref *ref) { struct xe_file *xef = container_of(ref, struct xe_file, refcount); - struct xe_device *xe = xef->xe; xa_destroy(&xef->exec_queue.xa); mutex_destroy(&xef->exec_queue.lock); xa_destroy(&xef->vm.xa); mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); - xe_drm_client_put(xef->client); kfree(xef->process_name); kfree(xef); @@ -334,7 +325,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.force_execlist = xe_modparam.force_execlist; spin_lock_init(&xe->irq.lock); - spin_lock_init(&xe->clients.lock); init_waitqueue_head(&xe->ufence_wq); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 85bede4dd6461..b9ea455d6f59f 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -379,15 +379,6 @@ struct xe_device { struct workqueue_struct *wq; } sriov; - /** @clients: drm clients info */ - struct { - /** @clients.lock: Protects drm clients info */ - spinlock_t lock; - - /** @clients.count: number of drm clients */ - u64 count; - } clients; - /** @usm: unified memory state */ struct { /** @usm.asid: convert a ASID to VM */ diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 246190b3e2bbd..b6adfb9f20306 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -139,9 +139,10 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, } /* CCS mode can only be updated when there are no drm clients */ - spin_lock(&xe->clients.lock); - if (xe->clients.count) { - spin_unlock(&xe->clients.lock); + mutex_lock(&xe->drm.filelist_mutex); + if (!list_empty(&xe->drm.filelist)) { + mutex_unlock(&xe->drm.filelist_mutex); + xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n"); return -EBUSY; } @@ -152,7 +153,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_reset_async(gt); } - spin_unlock(&xe->clients.lock); + mutex_unlock(&xe->drm.filelist_mutex); return count; } -- GitLab From cbe006a6492c01a0058912ae15d473f4c149896c Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Oct 2024 13:01:15 +0100 Subject: [PATCH 005/882] drm/xe: Move LNL scheduling WA to xe_device.h Move LNL scheduling WA to xe_device.h so this can be used in other places without needing keep the same comment about removal of this WA in the future. The WA, which flushes work or workqueues, is now wrapped in macros and can be reused wherever needed. Cc: Badal Nilawar Cc: Matthew Auld Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi cc: stable@vger.kernel.org # v6.11+ Suggested-by: John Harrison Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241029120117.449694-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.h | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_guc_ct.c | 11 +---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 4c3f0ebe78a90..f1fbfe9168678 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -191,4 +191,18 @@ void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); void xe_file_put(struct xe_file *xef); +/* + * Occasionally it is seen that the G2H worker starts running after a delay of more than + * a second even after being queued and activated by the Linux workqueue subsystem. This + * leads to G2H timeout error. The root cause of issue lies with scheduling latency of + * Lunarlake Hybrid CPU. Issue disappears if we disable Lunarlake atom cores from BIOS + * and this is beyond xe kmd. + * + * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU. + */ +#define LNL_FLUSH_WORKQUEUE(wq__) \ + flush_workqueue(wq__) +#define LNL_FLUSH_WORK(wrk__) \ + flush_work(wrk__) + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 4870af1c5a900..8aeb1789805c5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1018,17 +1018,8 @@ retry_same_fence: ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); - /* - * Occasionally it is seen that the G2H worker starts running after a delay of more than - * a second even after being queued and activated by the Linux workqueue subsystem. This - * leads to G2H timeout error. The root cause of issue lies with scheduling latency of - * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS - * and this is beyond xe kmd. - * - * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU. - */ if (!ret) { - flush_work(&ct->g2h_worker); + LNL_FLUSH_WORK(&ct->g2h_worker); if (g2h_fence.done) { xe_gt_warn(gt, "G2H fence %u, action %04x, done\n", g2h_fence.seqno, action[0]); -- GitLab From 38c4c8722bd74452280951edc44c23de47612001 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Oct 2024 13:01:16 +0100 Subject: [PATCH 006/882] drm/xe/ufence: Flush xe ordered_wq in case of ufence timeout Flush xe ordered_wq in case of ufence timeout which is observed on LNL and that points to recent scheduling issue with E-cores. This is similar to the recent fix: commit e51527233804 ("drm/xe/guc/ct: Flush g2h worker in case of g2h response timeout") and should be removed once there is a E-core scheduling fix for LNL. v2: Add platform check(Himal) s/__flush_workqueue/flush_workqueue(Jani) v3: Remove gfx platform check as the issue related to cpu platform(John) v4: Use the Common macro(John) and print when the flush resolves timeout(Matt B) Cc: Badal Nilawar Cc: Matthew Auld Cc: John Harrison Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: stable@vger.kernel.org # v6.11+ Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2754 Suggested-by: Matthew Brost Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241029120117.449694-2-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index f5deb81eba01e..5b4264ea38bd2 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -155,6 +155,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, } if (!timeout) { + LNL_FLUSH_WORKQUEUE(xe->ordered_wq); + err = do_compare(addr, args->value, args->mask, + args->op); + if (err <= 0) { + drm_dbg(&xe->drm, "LNL_FLUSH_WORKQUEUE resolved ufence timeout\n"); + break; + } err = -ETIME; break; } -- GitLab From e1f6fa55664a0eeb0a641f497e1adfcf6672e995 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Oct 2024 13:01:17 +0100 Subject: [PATCH 007/882] drm/xe/guc/tlb: Flush g2h worker in case of tlb timeout Flush the g2h worker explicitly if TLB timeout happens which is observed on LNL and that points to the recent scheduling issue with E-cores on LNL. This is similar to the recent fix: commit e51527233804 ("drm/xe/guc/ct: Flush g2h worker in case of g2h response timeout") and should be removed once there is E core scheduling fix. v2: Add platform check(Himal) v3: Remove gfx platform check as the issue related to cpu platform(John) Use the common WA macro(John) and print when the flush resolves timeout(Matt B) v4: Remove the resolves log and do the flush before taking pending_lock(Matt A) Cc: Badal Nilawar Cc: Matthew Brost Cc: Matthew Auld Cc: John Harrison Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: stable@vger.kernel.org # v6.11+ Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2687 Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241029120117.449694-3-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 773de1f08db9b..3cb228c773cd4 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -72,6 +72,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; + LNL_FLUSH_WORK(>->uc.guc.ct.g2h_worker); + spin_lock_irq(>->tlb_invalidation.pending_lock); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { -- GitLab From 6bd49cc1a8924c3fe9554526f2d42d8d8851aea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 31 Oct 2024 16:37:31 +0100 Subject: [PATCH 008/882] drm/xe: Avoid the OOM killer on buffer object memory allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than invoking the OOM killer on buffer object memory allocations and validations, have the allocations fail and pass the error to user-space if applicable. Cc: Maarten Lankhorst Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2701 Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241031153732.164995-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d5d30a0ff1e78..d14c0bfc70dd9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -876,6 +876,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) }; struct ttm_operation_ctx ctx = { .interruptible = false, + .gfp_retry_mayfail = true, }; struct ttm_resource *new_mem; int ret; @@ -937,6 +938,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) { struct ttm_operation_ctx ctx = { .interruptible = false, + .gfp_retry_mayfail = false, }; struct ttm_resource *new_mem; int ret; @@ -1099,7 +1101,8 @@ static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operati static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) { struct ttm_operation_ctx ctx = { - .interruptible = false + .interruptible = false, + .gfp_retry_mayfail = false, }; if (ttm_bo->ttm) { @@ -1294,6 +1297,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; struct ttm_placement *placement; uint32_t alignment; @@ -1897,6 +1901,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; if (vm) { @@ -2240,6 +2245,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; struct ttm_placement placement; struct ttm_place requested; @@ -2290,6 +2296,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc) .interruptible = false, .no_wait_gpu = false, .force_alloc = force_alloc, + .gfp_retry_mayfail = true, }; struct ttm_placement placement; int ret; -- GitLab From 1a7b71805a3051ae04dde1307a6eecedaca857b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 31 Oct 2024 16:37:32 +0100 Subject: [PATCH 009/882] drm/xe: Don't unnecessarily invoke the OOM killer on multiple binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple single-ioctl binds can be split up into multiple bind ioctls, reducing the memory required to hold the bind array. So rather than allowing the OOM killer to be invoked, return -ENOMEM or -ENOBUFS to user-space to take corrective action. v2: - Add __GFP_NOWARN to __GFP_RETRY_MAYFAIL allocations to avoid spamming the kernel log if a recoverable allocation fails. Cc: Maarten Lankhorst Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2701 Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241031153732.164995-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c99380271de62..624133fae5f55 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -733,7 +733,7 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) vops->pt_update_ops[i].ops = kmalloc_array(vops->pt_update_ops[i].num_ops, sizeof(*vops->pt_update_ops[i].ops), - GFP_KERNEL); + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!vops->pt_update_ops[i].ops) return array_of_binds ? -ENOBUFS : -ENOMEM; } @@ -2733,7 +2733,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, *bind_ops = kvmalloc_array(args->num_binds, sizeof(struct drm_xe_vm_bind_op), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; @@ -2973,14 +2974,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (args->num_binds) { bos = kvcalloc(args->num_binds, sizeof(*bos), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!bos) { err = -ENOMEM; goto release_vm_lock; } ops = kvcalloc(args->num_binds, sizeof(*ops), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!ops) { err = -ENOMEM; goto release_vm_lock; -- GitLab From a19d1db9a3fa89fabd7c83544b84f393ee9b851f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 31 Oct 2024 11:22:57 -0700 Subject: [PATCH 010/882] drm/xe: Restore system memory GGTT mappings GGTT mappings reside on the device and this state is lost during suspend / d3cold thus this state must be restored resume regardless if the BO is in system memory or VRAM. v2: - Unnecessary parentheses around bo->placements[0] (Checkpatch) Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241031182257.2949579-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 14 +++++++++++--- drivers/gpu/drm/xe/xe_bo_evict.c | 1 - 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d14c0bfc70dd9..92bf2715a76c5 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -889,8 +889,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (WARN_ON(!xe_bo_is_pinned(bo))) return -EINVAL; - if (WARN_ON(!xe_bo_is_vram(bo))) - return -EINVAL; + if (!xe_bo_is_vram(bo)) + return 0; ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); if (ret) @@ -941,6 +941,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) .gfp_retry_mayfail = false, }; struct ttm_resource *new_mem; + struct ttm_place *place = &bo->placements[0]; int ret; xe_bo_assert_held(bo); @@ -954,6 +955,9 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm)) return -EINVAL; + if (!mem_type_is_vram(place->mem_type)) + return 0; + ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); if (ret) return ret; @@ -1808,7 +1812,10 @@ int xe_bo_pin(struct xe_bo *bo) place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); + } + if (mem_type_is_vram(place->mem_type) || + bo->flags & XE_BO_FLAG_GGTT) { spin_lock(&xe->pinned.lock); list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); spin_unlock(&xe->pinned.lock); @@ -1869,7 +1876,8 @@ void xe_bo_unpin(struct xe_bo *bo) bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); - if (mem_type_is_vram(place->mem_type)) { + if (mem_type_is_vram(place->mem_type) || + bo->flags & XE_BO_FLAG_GGTT) { spin_lock(&xe->pinned.lock); xe_assert(xe, !list_empty(&bo->pinned_link)); list_del_init(&bo->pinned_link); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 541b49007d738..32043e1e5a863 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -159,7 +159,6 @@ int xe_bo_restore_kernel(struct xe_device *xe) * should setup the iosys map. */ xe_assert(xe, !iosys_map_is_null(&bo->vmap)); - xe_assert(xe, xe_bo_is_vram(bo)); xe_bo_put(bo); -- GitLab From b7cfe79f06d673fccd388896ff67f305b8378716 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 3 Nov 2024 14:49:36 +0000 Subject: [PATCH 011/882] drm/i915/gt: Remove unused execlists_unwind_incomplete_requests execlists_unwind_incomplete_requests() is unused since 2021's commit eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://patchwork.freedesktop.org/patch/msgid/20241103144936.238116-1-linux@treblig.org Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine.h | 3 --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 9 --------- 2 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 40269e4c1e312..325da0414d946 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -126,9 +126,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 72090f52fb850..4a80ffa1b962e 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -405,15 +405,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { -- GitLab From be15f0bc4a95e681466b2cfa1ceb86a9e38c5be6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 10:31:04 -0800 Subject: [PATCH 012/882] drm/xe: Fix drm-next merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix merge in commit c787c2901e2c ("Merge drm/drm-next into drm-xe-next"). That workaround needs to be done only once. While at it, also remove undesired newline before checking for error. Fixes: c787c2901e2c ("Merge drm/drm-next into drm-xe-next") Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20241104183104.2265275-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ct.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index a405b9218ad2d..550eeed43903b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1017,7 +1017,6 @@ retry_same_fence: } ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); - if (!ret) { LNL_FLUSH_WORK(&ct->g2h_worker); if (g2h_fence.done) { @@ -1027,24 +1026,6 @@ retry_same_fence: } } - /* - * Occasionally it is seen that the G2H worker starts running after a delay of more than - * a second even after being queued and activated by the Linux workqueue subsystem. This - * leads to G2H timeout error. The root cause of issue lies with scheduling latency of - * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS - * and this is beyond xe kmd. - * - * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU. - */ - if (!ret) { - flush_work(&ct->g2h_worker); - if (g2h_fence.done) { - xe_gt_warn(gt, "G2H fence %u, action %04x, done\n", - g2h_fence.seqno, action[0]); - ret = 1; - } - } - /* * Ensure we serialize with completion side to prevent UAF with fence going out of scope on * the stack, since we have no clue if it will fire after the timeout before we can erase -- GitLab From db62482e3242751aeb05c5995175795cc08605e6 Mon Sep 17 00:00:00 2001 From: Gyeyoung Baek Date: Sat, 2 Nov 2024 11:22:03 +0900 Subject: [PATCH 013/882] drm/xe: Fix build error for XE_IOCTL_DBG macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_DRM_USE_DYNAMIC_DEBUG is set, 'drm_dbg' function is replaced with '__dynamic_func_call_cls', which is replaced with a do while statement. So in the previous code, there are the following build errors. include/linux/dynamic_debug.h:221:58: error: expected expression before ‘do’ 221 | #define __dynamic_func_call_cls(id, cls, fmt, func, ...) do { \ | ^~ include/linux/dynamic_debug.h:248:9: note: in expansion of macro ‘__dynamic_func_call_cls’ 248 | __dynamic_func_call_cls(__UNIQUE_ID(ddebug), cls, fmt, func, ##__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~~~~~ include/drm/drm_print.h:425:9: note: in expansion of macro ‘_dynamic_func_call_cls’ 425 | _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg, \ | ^~~~~~~~~~~~~~~~~~~~~~ include/drm/drm_print.h:504:9: note: in expansion of macro ‘drm_dev_dbg’ 504 | drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) | ^~~~~~~~~~~ include/drm/drm_print.h:522:33: note: in expansion of macro ‘drm_dbg_driver’ 522 | #define drm_dbg(drm, fmt, ...) drm_dbg_driver(drm, fmt, ##__VA_ARGS__) | ^~~~~~~~~~~~~~ drivers/gpu/drm/xe/xe_macros.h:14:21: note: in expansion of macro ‘drm_dbg’ 14 | ((cond) && (drm_dbg(&(xe)->drm, \ | ^~~~~~~ drivers/gpu/drm/xe/xe_bo.c:2029:13: note: in expansion of macro ‘XE_IOCTL_DBG’ 2029 | if (XE_IOCTL_DBG(xe, !gem_obj)) The problem is that XE_IOCTL_DBG uses this function for conditional expr. Use a statement expression so it can also be used on conditional checks. v2: Modify to print when only cond is true. v3: Modify to evaluate cond only once. Signed-off-by: Gyeyoung Baek Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241102022204.155039-1-gye976@gmail.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_macros.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index daf56c846d033..8a77c2423555f 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -10,9 +10,13 @@ #define XE_WARN_ON WARN_ON -#define XE_IOCTL_DBG(xe, cond) \ - ((cond) && (drm_dbg(&(xe)->drm, \ - "Ioctl argument check failed at %s:%d: %s", \ - __FILE__, __LINE__, #cond), 1)) +#define XE_IOCTL_DBG(xe, cond) ({ \ + int cond__ = !!(cond); \ + if (cond__) \ + drm_dbg(&(xe)->drm, \ + "Ioctl argument check failed at %s:%d: %s", \ + __FILE__, __LINE__, #cond); \ + cond__; \ +}) #endif -- GitLab From aa06cb835153d79aa7c18eb9ffc70866acddaaad Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 2 Nov 2024 09:12:53 -0700 Subject: [PATCH 014/882] drm/xe: Improve devcoredump documentation Let the introduction be useful for both userspace and kernel. Also improve the formatting to wire up later to the documentation build. Reviewed-by: Raag Jadav Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241102161254.1818604-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_devcoredump.c | 51 +++++++++++++++++------------ 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d2679c5d976b0..d3570d3d573c2 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -29,30 +29,39 @@ /** * DOC: Xe device coredump * - * Devices overview: * Xe uses dev_coredump infrastructure for exposing the crash errors in a - * standardized way. - * devcoredump exposes a temporary device under /sys/class/devcoredump/ - * which is linked with our card device directly. - * The core dump can be accessed either from - * /sys/class/drm/card/device/devcoredump/ or from - * /sys/class/devcoredump/devcd where - * /sys/class/devcoredump/devcd/failing_device is a link to - * /sys/class/drm/card/device/. + * standardized way. Once a crash occurs, devcoredump exposes a temporary + * node under ``/sys/class/devcoredump/devcd/``. The same node is also + * accessible in ``/sys/class/drm/card/device/devcoredump/``. The + * ``failing_device`` symlink points to the device that crashed and created the + * coredump. * - * Snapshot at hang: - * The 'data' file is printed with a drm_printer pointer at devcoredump read - * time. For this reason, we need to take snapshots from when the hang has - * happened, and not only when the user is reading the file. Otherwise the - * information is outdated since the resets might have happened in between. + * The following characteristics are observed by xe when creating a device + * coredump: * - * 'First' failure snapshot: - * In general, the first hang is the most critical one since the following hangs - * can be a consequence of the initial hang. For this reason we only take the - * snapshot of the 'first' failure and ignore subsequent calls of this function, - * at least while the coredump device is alive. Dev_coredump has a delayed work - * queue that will eventually delete the device and free all the dump - * information. + * **Snapshot at hang**: + * The 'data' file contains a snapshot of the HW and driver states at the time + * the hang happened. Due to the driver recovering from resets/crashes, it may + * not correspond to the state of the system when the file is read by + * userspace. + * + * **Coredump release**: + * After a coredump is generated, it stays in kernel memory until released by + * userpace by writing anything to it, or after an internal timer expires. The + * exact timeout may vary and should not be relied upon. Example to release + * a coredump: + * + * .. code-block:: shell + * + * $ > /sys/class/drm/card0/device/devcoredump/data + * + * **First failure only**: + * In general, the first hang is the most critical one since the following + * hangs can be a consequence of the initial hang. For this reason a snapshot + * is taken only for the first failure. Until the devcoredump is released by + * userspace or kernel, all subsequent hangs do not override the snapshot nor + * create new ones. Devcoredump has a delayed work queue that will eventually + * delete the file node and free all the dump information. */ #ifdef CONFIG_DEV_COREDUMP -- GitLab From a8f6035aebe768780abd730bd1ac61e460f43970 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 2 Nov 2024 09:12:54 -0700 Subject: [PATCH 015/882] drm/xe: Wire up devcoredump in documentation Add a documentation page to detail the device coredump as implemented by xe - it was documented in the source code, but not visible in the rendered (html) documentation. Reviewed-by: Matthew Brost Reviewed-by: Raag Jadav Link: https://patchwork.freedesktop.org/patch/msgid/20241102161254.1818604-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- Documentation/gpu/xe/index.rst | 1 + Documentation/gpu/xe/xe_devcoredump.rst | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 Documentation/gpu/xe/xe_devcoredump.rst diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index 3f07aa3b54325..92cfb25e64d32 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -23,4 +23,5 @@ DG2, etc is provided to prototype the driver. xe_firmware xe_tile xe_debugging + xe_devcoredump xe-drm-usage-stats.rst diff --git a/Documentation/gpu/xe/xe_devcoredump.rst b/Documentation/gpu/xe/xe_devcoredump.rst new file mode 100644 index 0000000000000..ae4ec0e34dc01 --- /dev/null +++ b/Documentation/gpu/xe/xe_devcoredump.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================== +Xe Device Coredump +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c + :doc: Xe device coredump + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c + :internal: -- GitLab From 71fb41bdd9bab7f541d81920367e2732ead7db8c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 23:15:39 -0800 Subject: [PATCH 016/882] drm/xe: Fix case for asserts in documentation The rendered html documentation for "Xe ASSERTs" doesn't look nice with the mixed caps and gives the impression it was a typo. Use Title Case Style. Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241105071539.2623727-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_assert.h | 8 ++++---- drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h | 2 +- drivers/gpu/drm/xe/xe_sriov_pf_helpers.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 04d6b95c6d878..68fe70ce2be3b 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -14,7 +14,7 @@ #include "xe_step.h" /** - * DOC: Xe ASSERTs + * DOC: Xe Asserts * * While Xe driver aims to be simpler than legacy i915 driver it is still * complex enough that some changes introduced while adding new functionality @@ -103,7 +103,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") #define xe_assert_msg(xe, condition, msg, arg...) ({ \ @@ -138,7 +138,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") #define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ @@ -162,7 +162,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") #define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h index 0bf12d89ceb25..6af219d93c3b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h @@ -18,7 +18,7 @@ * is within a range of supported VF numbers (up to maximum number of VFs that * driver can support, including VF0 that represents the PF itself). * - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. */ #define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid)) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index 7d156ba824791..dd1df950b021a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -20,7 +20,7 @@ * is within a range of supported VF numbers (up to maximum number of VFs that * driver can support, including VF0 that represents the PF itself). * - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. */ #define xe_sriov_pf_assert_vfid(xe, vfid) \ xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe)) -- GitLab From 07064a200b40ac2195cb6b7b779897d9377e5e6f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 4 Nov 2024 20:35:23 -0800 Subject: [PATCH 017/882] drm/xe: Fix possible exec queue leak in exec IOCTL In a couple of places after an exec queue is looked up the exec IOCTL returns on input errors without dropping the exec queue ref. Fix this ensuring the exec queue ref is dropped on input error. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Signed-off-by: Matthew Brost Reviewed-by: Tejas Upadhyay Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241105043524.4062774-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f23ac1e2ed88c..6de12f91b865b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -132,12 +132,16 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, args->num_batch_buffer && - q->width != args->num_batch_buffer)) - return -EINVAL; + q->width != args->num_batch_buffer)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { err = -ECANCELED; -- GitLab From 7d1a4258e602ffdce529f56686925034c1b3b095 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 4 Nov 2024 20:35:24 -0800 Subject: [PATCH 018/882] drm/xe: Drop VM dma-resv lock on xe_sync_in_fence_get failure in exec IOCTL Upon failure all locks need to be dropped before returning to the user. Fixes: 58480c1c912f ("drm/xe: Skip VMAs pin when requesting signal to the last XE_EXEC") Cc: Signed-off-by: Matthew Brost Reviewed-by: Tejas Upadhyay Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241105043524.4062774-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 6de12f91b865b..756b492f13b03 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -224,6 +224,7 @@ retry: fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); if (IS_ERR(fence)) { err = PTR_ERR(fence); + xe_vm_unlock(vm); goto err_unlock_list; } for (i = 0; i < num_syncs; i++) -- GitLab From 43b1dd2b550f0861ce80fbfffd5881b1b26272b1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 4 Nov 2024 15:49:01 +0100 Subject: [PATCH 019/882] drm/xe/pf: Fix potential GGTT allocation leak In unlikely event that we fail during sending the new VF GGTT configuration to the GuC, we will free only the GGTT node data struct but will miss to release the actual GGTT allocation. This will later lead to list corruption, GGTT space leak and finally risking crash when unloading the driver: [ ] ... [drm] GT0: PF: Failed to provision VF1 with 1073741824 (1.00 GiB) GGTT (-EIO) [ ] ... [drm] GT0: PF: VF1 provisioning remains at 0 (0 B) GGTT [ ] list_add corruption. next->prev should be prev (ffff88813cfcd628), but was 0000000000000000. (next=ffff88813cfe2028). [ ] RIP: 0010:__list_add_valid_or_report+0x6b/0xb0 [ ] Call Trace: [ ] drm_mm_insert_node_in_range+0x2c0/0x4e0 [ ] xe_ggtt_node_insert+0x46/0x70 [xe] [ ] pf_provision_vf_ggtt+0x7f5/0xa70 [xe] [ ] xe_gt_sriov_pf_config_set_ggtt+0x5e/0x770 [xe] [ ] ggtt_set+0x4b/0x70 [xe] [ ] simple_attr_write_xsigned.constprop.0.isra.0+0xb0/0x110 [ ] ... [drm] GT0: PF: Failed to provision VF1 with 1073741824 (1.00 GiB) GGTT (-ENOSPC) [ ] ... [drm] GT0: PF: VF1 provisioning remains at 0 (0 B) GGTT [ ] Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b7b: 0000 [#1] PREEMPT SMP NOPTI [ ] RIP: 0010:drm_mm_remove_node+0x1b7/0x390 [ ] Call Trace: [ ] [ ] ? die_addr+0x2e/0x80 [ ] ? exc_general_protection+0x1a1/0x3e0 [ ] ? asm_exc_general_protection+0x22/0x30 [ ] ? drm_mm_remove_node+0x1b7/0x390 [ ] ggtt_node_remove+0xa5/0xf0 [xe] [ ] xe_ggtt_node_remove+0x35/0x70 [xe] [ ] xe_ttm_bo_destroy+0x123/0x220 [xe] [ ] intel_user_framebuffer_destroy+0x44/0x70 [xe] [ ] intel_plane_destroy_state+0x3b/0xc0 [xe] [ ] drm_atomic_state_default_clear+0x1cd/0x2f0 [ ] intel_atomic_state_clear+0x9/0x20 [xe] [ ] __drm_atomic_state_free+0x1d/0xb0 Fix that by using pf_release_ggtt() on the error path, which now works regardless if the node has GGTT allocation or not. Fixes: 34e804220f69 ("drm/xe: Make xe_ggtt_node struct independent") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Matthew Auld Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241104144901.1903-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 062a0c2fd2cdf..192643d63d224 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -395,6 +395,8 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ xe_ggtt_node_remove(node, false); + } else { + xe_ggtt_node_fini(node); } } @@ -450,7 +452,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) config->ggtt_region = node; return 0; err: - xe_ggtt_node_fini(node); + pf_release_ggtt(tile, node); return err; } -- GitLab From c62018a002dd5da0262e005a89fe691ca8d57cf6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 13:35:09 -0800 Subject: [PATCH 020/882] drm/i915/pmu: Rename cpuhp_slot to cpuhp_state Both the documentation and most of other users call the return of cpuhp_setup_state_multi() as "state". Follow that. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241104213512.2314930-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/i915_pmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 93fbf53578da2..8706957ddc0a3 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1218,7 +1218,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; } -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; +static enum cpuhp_state cpuhp_state = CPUHP_INVALID; int i915_pmu_init(void) { @@ -1232,28 +1232,28 @@ int i915_pmu_init(void) pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n", ret); else - cpuhp_slot = ret; + cpuhp_state = ret; return 0; } void i915_pmu_exit(void) { - if (cpuhp_slot != CPUHP_INVALID) - cpuhp_remove_multi_state(cpuhp_slot); + if (cpuhp_state != CPUHP_INVALID) + cpuhp_remove_multi_state(cpuhp_state); } static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { - if (cpuhp_slot == CPUHP_INVALID) + if (cpuhp_state == CPUHP_INVALID) return -EINVAL; - return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node); + return cpuhp_state_add_instance(cpuhp_state, &pmu->cpuhp.node); } static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node); + cpuhp_state_remove_instance(cpuhp_state, &pmu->cpuhp.node); } void i915_pmu_register(struct drm_i915_private *i915) -- GitLab From 9116b5760e615336b0c5060a85b25b2ec7d7c48b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 13:35:10 -0800 Subject: [PATCH 021/882] drm/i915/pmu: Stop setting event_init to NULL Setting event_init to NULL is mostly done to detect when the driver is partially working: i915 probed, but pmu is not registered. However, checking for event_init is odd as it was supposed to always be set and kernel/events/ would just crash if it found it set to NULL. Since there's already a "closed" boolean, use that instead and extend it's meaning to unregistered/unregistering. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241104213512.2314930-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/i915_pmu.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 8706957ddc0a3..86faa5705fd8f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -302,7 +302,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (!pmu->base.event_init) + if (pmu->closed) return; spin_lock_irq(&pmu->lock); @@ -324,7 +324,7 @@ void i915_pmu_gt_unparked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (!pmu->base.event_init) + if (pmu->closed) return; spin_lock_irq(&pmu->lock); @@ -1177,8 +1177,6 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - GEM_BUG_ON(!pmu->base.event_init); - /* Select the first online CPU as a designated reader. */ if (cpumask_empty(&i915_pmu_cpumask)) cpumask_set_cpu(cpu, &i915_pmu_cpumask); @@ -1191,8 +1189,6 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); unsigned int target = i915_pmu_target_cpu; - GEM_BUG_ON(!pmu->base.event_init); - /* * Unregistering an instance generates a CPU offline event which we must * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. @@ -1265,9 +1261,10 @@ void i915_pmu_register(struct drm_i915_private *i915) &i915_pmu_cpumask_attr_group, NULL }; - int ret = -ENOMEM; + pmu->closed = true; + spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; @@ -1316,6 +1313,8 @@ void i915_pmu_register(struct drm_i915_private *i915) if (ret) goto err_unreg; + pmu->closed = false; + return; err_unreg: @@ -1323,7 +1322,6 @@ err_unreg: err_groups: kfree(pmu->base.attr_groups); err_attr: - pmu->base.event_init = NULL; free_event_attributes(pmu); err_name: if (IS_DGFX(i915)) @@ -1336,9 +1334,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - if (!pmu->base.event_init) - return; - /* * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu * ensures all currently executing ones will have exited before we @@ -1352,7 +1347,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915) i915_pmu_unregister_cpuhp_state(pmu); perf_pmu_unregister(&pmu->base); - pmu->base.event_init = NULL; kfree(pmu->base.attr_groups); if (IS_DGFX(i915)) kfree(pmu->name); -- GitLab From 6ba29f1352482b815e2414b718bbd6de8d884d10 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 13:35:11 -0800 Subject: [PATCH 022/882] drm/i915/pmu: Replace closed with registered Since i915 calls perf_pmu_register/perf_pmu_unregister, let's call the variable "registered" so we can flip the logic and rely on it being false by default. Looking at other drivers, it's also more common. Examples: arch/x86/events/intel/uncore.c and drivers/powercap/intel_rapl_common.c. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241104213512.2314930-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/i915_pmu.c | 25 +++++++++++++------------ drivers/gpu/drm/i915/i915_pmu.h | 4 ++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 86faa5705fd8f..bd476297ed0a1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -302,7 +302,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (pmu->closed) + if (!pmu->registered) return; spin_lock_irq(&pmu->lock); @@ -324,7 +324,7 @@ void i915_pmu_gt_unparked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (pmu->closed) + if (!pmu->registered) return; spin_lock_irq(&pmu->lock); @@ -626,7 +626,7 @@ static int i915_pmu_event_init(struct perf_event *event) struct drm_i915_private *i915 = pmu_to_i915(pmu); int ret; - if (pmu->closed) + if (!pmu->registered) return -ENODEV; if (event->attr.type != event->pmu->type) @@ -724,7 +724,7 @@ static void i915_pmu_event_read(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 prev, new; - if (pmu->closed) { + if (!pmu->registered) { event->hw.state = PERF_HES_STOPPED; return; } @@ -850,7 +850,7 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) return; i915_pmu_enable(event); @@ -861,7 +861,7 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) goto out; if (flags & PERF_EF_UPDATE) @@ -877,7 +877,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) return -ENODEV; if (flags & PERF_EF_START) @@ -1193,7 +1193,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) * Unregistering an instance generates a CPU offline event which we must * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. */ - if (pmu->closed) + if (!pmu->registered) return 0; if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { @@ -1263,8 +1263,6 @@ void i915_pmu_register(struct drm_i915_private *i915) }; int ret = -ENOMEM; - pmu->closed = true; - spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; @@ -1313,7 +1311,7 @@ void i915_pmu_register(struct drm_i915_private *i915) if (ret) goto err_unreg; - pmu->closed = false; + pmu->registered = true; return; @@ -1334,12 +1332,15 @@ void i915_pmu_unregister(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; + if (!pmu->registered) + return; + /* * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu * ensures all currently executing ones will have exited before we * proceed with unregistration. */ - pmu->closed = true; + pmu->registered = false; synchronize_rcu(); hrtimer_cancel(&pmu->timer); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 41af038c37388..8e66d63d0c9f9 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -68,9 +68,9 @@ struct i915_pmu { */ struct pmu base; /** - * @closed: i915 is unregistering. + * @registered: PMU is registered and not in the unregistering process. */ - bool closed; + bool registered; /** * @name: Name as registered with perf core. */ -- GitLab From 79367b7a58c82d0b1c0a7b0ef748f7aafa91d048 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 13:35:12 -0800 Subject: [PATCH 023/882] drm/i915/pmu: Remove pointless synchronize_rcu() call This is already done inside perf_pmu_unregister() - no need to do it before. Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241104213512.2314930-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/i915_pmu.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index bd476297ed0a1..e55db036be1bb 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1335,13 +1335,8 @@ void i915_pmu_unregister(struct drm_i915_private *i915) if (!pmu->registered) return; - /* - * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu - * ensures all currently executing ones will have exited before we - * proceed with unregistration. - */ + /* Disconnect the PMU callbacks */ pmu->registered = false; - synchronize_rcu(); hrtimer_cancel(&pmu->timer); -- GitLab From a7238ee33c409e8edea365cc9e6539ed31a5c859 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 06:38:11 -0800 Subject: [PATCH 024/882] drm/xe: Add trace to lrc timestamp update Help debugging when LRC timestamp is updated for a exec queue. Reviewed-by: Jonathan Cavitt Reviewed-by: Nirmoy Das Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20241104143815.2112272-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_lrc.c | 3 ++ drivers/gpu/drm/xe/xe_trace_lrc.c | 9 ++++++ drivers/gpu/drm/xe/xe_trace_lrc.h | 52 +++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_trace_lrc.c create mode 100644 drivers/gpu/drm/xe/xe_trace_lrc.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index da80c29aa3633..8f81e1c9d0182 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -101,6 +101,7 @@ xe-y += xe_bb.o \ xe_trace.o \ xe_trace_bo.o \ xe_trace_guc.o \ + xe_trace_lrc.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 4f64c7f4e68de..4b65da77c6e09 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -25,6 +25,7 @@ #include "xe_map.h" #include "xe_memirq.h" #include "xe_sriov.h" +#include "xe_trace_lrc.h" #include "xe_vm.h" #include "xe_wa.h" @@ -1758,5 +1759,7 @@ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + trace_xe_lrc_update_timestamp(lrc, *old_ts); + return lrc->ctx_timestamp; } diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.c b/drivers/gpu/drm/xe/xe_trace_lrc.c new file mode 100644 index 0000000000000..ab9b7e2970bc2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_lrc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_lrc.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h new file mode 100644 index 0000000000000..5c669a0b21808 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_LRC_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_LRC_H_ + +#include +#include + +#include "xe_gt_types.h" +#include "xe_lrc.h" +#include "xe_lrc_types.h" + +#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) + +TRACE_EVENT(xe_lrc_update_timestamp, + TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_ARGS(lrc, old), + TP_STRUCT__entry( + __field(struct xe_lrc *, lrc) + __field(u32, old) + __field(u32, new) + __string(name, lrc->fence_ctx.name) + __string(device_id, __dev_name_lrc(lrc)) + ), + + TP_fast_assign( + __entry->lrc = lrc; + __entry->old = old; + __entry->new = lrc->ctx_timestamp; + __assign_str(name); + __assign_str(device_id); + ), + TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + __entry->lrc, __get_str(name), + __entry->old, __entry->new, + __get_str(device_id)) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_lrc +#include -- GitLab From 83db047d9425d9a649f01573797558eff0f632e1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 06:38:12 -0800 Subject: [PATCH 025/882] drm/xe: Stop accumulating LRC timestamp on job_free The exec queue timestamp is only really useful when it's being queried through the fdinfo. There's no need to update it so often, on every job_free. Tracing a simple app like vkcube running shows an update rate of ~ 120Hz. In case of discrete, the BO is on vram, creating a lot of pcie transactions. The update on job_free() is used to cover a gap: if exec queue is created and destroyed rapidly, before a new query, the timestamp still needs to be accumulated and accounted for in the xef. Initial implementation in commit 6109f24f87d7 ("drm/xe: Add helper to accumulate exec queue runtime") couldn't do it on the exec_queue_fini since the xef could be gone at that point. However since commit ce8c161cbad4 ("drm/xe: Add ref counting for xe_file") the xef is refcounted and the exec queue always holds a reference, making this safe now. Improve the fix in commit 2149ded63079 ("drm/xe: Fix use after free when client stats are captured") by reducing the frequency in which the update is needed. Fixes: 2149ded63079 ("drm/xe: Fix use after free when client stats are captured") Reviewed-by: Nirmoy Das Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20241104143815.2112272-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec_queue.c | 6 ++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d098d2dd1b2d8..fd0f3b3c9101d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -260,8 +260,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) { int i; + /* + * Before releasing our ref to lrc and xef, accumulate our run ticks + */ + xe_exec_queue_update_run_ticks(q); + for (i = 0; i < q->width; ++i) xe_lrc_put(q->lrc[i]); + __xe_exec_queue_free(q); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 7afcc243037c8..9a8564ea06b59 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -747,8 +747,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); - xe_exec_queue_update_run_ticks(job->q); - trace_xe_sched_job_free(job); xe_sched_job_put(job); } -- GitLab From 20ade9c3f1958035306500e1ee0c7ee777ee8d42 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 06:38:13 -0800 Subject: [PATCH 026/882] drm/xe: Reword exec_queue and vm lock doc Reword documentation to possibly what it meant to be. Reviewed-by: Nirmoy Das Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20241104143815.2112272-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b9ea455d6f59f..cb193234c7eca 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -588,7 +588,7 @@ struct xe_file { /** @vm.xe: xarray to store VMs */ struct xarray xa; /** - * @vm.lock: Protects VM lookup + reference and removal a from + * @vm.lock: Protects VM lookup + reference and removal from * file xarray. Not an intended to be an outer lock which does * thing while being held. */ @@ -601,8 +601,8 @@ struct xe_file { struct xarray xa; /** * @exec_queue.lock: Protects exec queue lookup + reference and - * removal a frommfile xarray. Not an intended to be an outer - * lock which does thing while being held. + * removal from file xarray. Not intended to be an outer lock + * which does things while being held. */ struct mutex lock; } exec_queue; -- GitLab From 6e6d7b41f9870cd464bed7632228b5f977e2c0b1 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 4 Nov 2024 22:34:45 +0100 Subject: [PATCH 027/882] drm/xe/vf: React to MIGRATED interrupt To properly support VF Save/Restore procedure, fixups need to be applied after PF driver finishes its part of VF Restore. The fixups are required to adjust the ongoing execution for a hardware switch that happened, because some GFX resources are not fully virtualized, and assigned to a VF as range from a global pool. The VF on which a VM is restored will often have different ranges provisioned than the VF on which save process happened. Those resource fixups are applied by the VF driver within a restored VM. A VF driver gets informed that it was migrated by receiving an interrupt from each GuC. The interrupt assigned for that purpose is "GUC SW interrupt 0". Seeing that fields set from within the irq handler should be the trigger for fixups. The VF can safely do post-migration fixups on resources associated to each GuC only after that GuC issued the MIGRATED interrupt. This change introduces a worker to be used for post-migration fixups, and a mechanism to schedule said worker when all GuCs sent the irq. v2: renamed and moved functions, updated logged messages, removed unused includes, used anon struct (Michal) v3: ordering, kerneldoc, asserts, debug messages, on_all_tiles -> on_all_gts (Michal) v4: fixed missing header include v5: Explained what fixups are, explained which IRQ is used, style fixes (Michal) Bspec: 50868 Signed-off-by: Tomasz Lis Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241104213449.1455694-2-tomasz.lis@intel.com Signed-off-by: Michal Wajdeczko --- drivers/gpu/drm/xe/Makefile | 3 +- drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 25 +++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 1 + drivers/gpu/drm/xe/xe_guc.c | 11 ++++ drivers/gpu/drm/xe/xe_memirq.c | 3 ++ drivers/gpu/drm/xe/xe_sriov.c | 4 ++ drivers/gpu/drm/xe/xe_sriov_types.h | 17 ++++++ drivers/gpu/drm/xe/xe_sriov_vf.c | 77 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_vf.h | 14 +++++ 10 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 8f81e1c9d0182..95aabbb74a284 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -125,7 +125,8 @@ xe-y += \ xe_gt_sriov_vf.o \ xe_guc_relay.o \ xe_memirq.o \ - xe_sriov.o + xe_sriov.o \ + xe_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index cb193234c7eca..bccca63c8a48c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -374,6 +374,8 @@ struct xe_device { /** @sriov.pf: PF specific data */ struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index d3baba50f0851..0e330eb938227 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -27,6 +27,7 @@ #include "xe_guc_relay.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_vf.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -692,6 +693,30 @@ failed: return err; } +/** + * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, + * or just mark that a GuC is ready for it. + * @gt: the &xe_gt struct instance linked to target GuC + * + * This function shall be called only by VF. + */ +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); + /* + * We need to be certain that if all flags were set, at least one + * thread will notice that and schedule the recovery. + */ + smp_mb__after_atomic(); + + xe_gt_sriov_info(gt, "ready for recovery after migration\n"); + xe_sriov_vf_start_migration_recovery(xe); +} + static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index e541ce57bec24..9959a296b221a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -17,6 +17,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7f704346a8f4d..7224593c9ce9b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1099,10 +1099,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) return guc_self_cfg(guc, key, 2, val); } +static void xe_guc_sw_0_irq_handler(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (IS_SRIOV_VF(gt_to_xe(gt))) + xe_gt_sriov_vf_migrated_event_handler(gt); +} + void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) { if (iir & GUC_INTR_GUC2HOST) xe_guc_ct_irq_handler(&guc->ct); + + if (iir & GUC_INTR_SW_INT_0) + xe_guc_sw_0_irq_handler(guc); } void xe_guc_sanitize(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index f833da88150a1..51dc90906003b 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -442,6 +442,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); + + if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name)) + xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0); } /** diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index ef10782af6560..04e2f539ccd94 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -14,6 +14,7 @@ #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_vf.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -114,6 +115,9 @@ int xe_sriov_init(struct xe_device *xe) return err; } + if (IS_SRIOV_VF(xe)) + xe_sriov_vf_init_early(xe); + xe_assert(xe, !xe->sriov.wq); xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); if (!xe->sriov.wq) diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index c7b7ad4af5c8f..ca94382a721e5 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -9,6 +9,7 @@ #include #include #include +#include /** * VFID - Virtual Function Identifier @@ -56,4 +57,20 @@ struct xe_device_pf { struct mutex master_lock; }; +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c new file mode 100644 index 0000000000000..2fe49296a79d4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_printk.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" +#include "xe_sriov_vf.h" + +static void migration_worker_func(struct work_struct *w); + +/** + * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. + * @xe: the &xe_device to initialize + */ +void xe_sriov_vf_init_early(struct xe_device *xe) +{ + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); +} + +static void vf_post_migration_recovery(struct xe_device *xe) +{ + drm_dbg(&xe->drm, "migration recovery in progress\n"); + /* FIXME: add the recovery steps */ + drm_notice(&xe->drm, "migration recovery ended\n"); +} + +static void migration_worker_func(struct work_struct *w) +{ + struct xe_device *xe = container_of(w, struct xe_device, + sriov.vf.migration.worker); + + vf_post_migration_recovery(xe); +} + +static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { + xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); + return false; + } + } + return true; +} + +/** + * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. + * @xe: the &xe_device to start recovery on + * + * This function shall be called only by VF. + */ +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) +{ + bool started; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + if (!vf_ready_to_recovery_on_all_gts(xe)) + return; + + WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); + /* Ensure other threads see that no flags are set now. */ + smp_mb(); + + started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); + drm_info(&xe->drm, "VF migration recovery %s\n", started ? + "scheduled" : "already in progress"); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h new file mode 100644 index 0000000000000..7b8622cff2b79 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_H_ +#define _XE_SRIOV_VF_H_ + +struct xe_device; + +void xe_sriov_vf_init_early(struct xe_device *xe); +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); + +#endif -- GitLab From 360a1f3e96dc21794d688bb4f885542b2b14e619 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 4 Nov 2024 22:34:46 +0100 Subject: [PATCH 028/882] drm/xe/vf: Document SRIOV VF restore flow This adds a documentation chapter, containing high level flow of VF restore procedure. v2: Better describe initial conditions, include GuC states on sequence diagram (Michal) v3: moved DOC to .c (Michal) Signed-off-by: Tomasz Lis Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241104213449.1455694-3-tomasz.lis@intel.com Signed-off-by: Michal Wajdeczko --- drivers/gpu/drm/xe/xe_sriov_vf.c | 107 +++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 2fe49296a79d4..ab334cea2a1ce 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -12,6 +12,113 @@ #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +/** + * DOC: VF restore procedure in PF KMD and VF KMD + * + * Restoring previously saved state of a VF is one of core features of + * SR-IOV. All major VM Management applications allow saving and restoring + * the VM state, and doing that to a VM which uses SRIOV VF as one of + * the accessible devices requires support from KMD on both PF and VF side. + * VMM initiates all required operations through VFIO module, which then + * translates them into PF KMD calls. This description will focus on these + * calls, leaving out the module which initiates these steps (VFIO). + * + * In order to start the restore procedure, GuC needs to keep the VF in + * proper state. The PF driver can ensure GuC set it to VF_READY state + * by provisioning the VF, which in turn can be done after Function Level + * Reset of said VF (or after it was freshly created - in that case FLR + * is not needed). The FLR procedure ends with GuC sending message + * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. + * After the provisioning is completed, the VF needs to be paused, and + * at that point the actual restore can begin. + * + * During VF Restore, state of several resources is restored. These may + * include local memory content (system memory is restored by VMM itself), + * values of MMIO registers, stateless compression metadata and others. + * The final resource which also needs restoring is state of the VF + * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` + * message is used, with reference to the state blob to be consumed by + * GuC. + * + * Next, when VFIO is asked to set the VM into running state, the PF driver + * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this + * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the + * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform + * the VF KMD within the VM that it was migrated. + * + * As soon as Virtual GPU of the VM starts, the VF driver within receives + * the MIGRATED interrupt and schedules post-migration recovery worker. + * That worker queries GuC for new provisioning (using MMIO communication), + * and applies fixups to any non-virtualized resources used by the VF. + * + * When the VF driver is ready to continue operation on the newly connected + * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to + * enter the long awaited `VF_RUNNING` state, and therefore start handling + * CTB messages and scheduling workloads from the VF:: + * + * PF GuC VF + * [ ] | | + * [ ] PF2GUC_VF_CONTROL(pause) | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_READY_PAUSED | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] success [ ] | + * [ ] <---------------------------[ ] | + * [ ] | | + * [ ] PF loads resources from the | | + * [ ]------- saved image supplied | | + * [ ] | | | + * [ ] <----- | | + * [ ] | | + * [ ] GUC_PF_OPCODE_VF_RESTORE | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC loads contexts and CTB | + * [ ] [ ]------- state from image | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_RESFIX_PAUSED | + * [ ] [ ] | | + * [ ] success [ ] <----- | + * [ ] <---------------------------[ ] | + * [ ] | | + * [ ] GUC_PF_TRIGGER_VF_RESUME | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_RESFIX_BLOCKED | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] [ ] | + * [ ] [ ] GUC_INTR_SW_INT_0 | + * [ ] success [ ]---------------------------> [ ] + * [ ] <---------------------------[ ] [ ] + * | | VF2GUC_QUERY_SINGLE_KLV [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] new VF provisioning [ ] + * | [ ]---------------------------> [ ] + * | | [ ] + * | | VF driver applies post [ ] + * | | migration fixups -------[ ] + * | | | [ ] + * | | -----> [ ] + * | | [ ] + * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] GuC sets new VF state to [ ] + * | [ ]------- VF_RUNNING [ ] + * | [ ] | [ ] + * | [ ] <----- [ ] + * | [ ] success [ ] + * | [ ]---------------------------> [ ] + * | | | + * | | | + */ + static void migration_worker_func(struct work_struct *w); /** -- GitLab From 1255954d9fdc5538be90034357525ef85bdbcf29 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 4 Nov 2024 22:34:47 +0100 Subject: [PATCH 029/882] drm/xe/vf: Send RESFIX_DONE message at end of VF restore After restore, GuC will not answer to any messages from VF KMD until fixups are applied. When that is done, VF KMD sends RESFIX_DONE message to GuC, at which point GuC resumes normal operation. This patch implements sending the RESFIX_DONE message at end of post-migration recovery. v2: keep pm ref during whole recovery, style fixes (Michal) v3: assert removal to separate patch, debug message per GuC instead of one, comments changes (Michal) v4: improve one debug message (Michal) Signed-off-by: Tomasz Lis Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241104213449.1455694-4-tomasz.lis@intel.com Signed-off-by: Michal Wajdeczko --- .../gpu/drm/xe/abi/guc_actions_sriov_abi.h | 38 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 38 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 1 + drivers/gpu/drm/xe/xe_sriov_vf.c | 18 +++++++++ 4 files changed, 95 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index b6a1852749dd1..0b28659d94e9b 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -501,6 +501,44 @@ #define VF2GUC_VF_RESET_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +/** + * DOC: VF2GUC_NOTIFY_RESFIX_DONE + * + * This action is used by VF to notify the GuC that the VF KMD has completed + * post-migration recovery steps. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u + +#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN +#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 + +#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + /** * DOC: VF2GUC_QUERY_SINGLE_KLV * diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 0e330eb938227..cca5d57328021 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -224,6 +224,44 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) return 0; } +static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) +{ + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), + }; + int ret; + + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +/** + * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. + * @gt: the &xe_gt struct instance linked to target GuC + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) +{ + struct xe_guc *guc = >->uc.guc; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + err = guc_action_vf_notify_resfix_done(guc); + if (unlikely(err)) + xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", + ERR_PTR(err)); + else + xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); + + return err; +} + static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, u32 *value, u32 value_len) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 9959a296b221a..912d208142616 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -17,6 +17,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); +int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index ab334cea2a1ce..945893ad967f3 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -8,6 +8,8 @@ #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_vf.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" @@ -130,10 +132,26 @@ void xe_sriov_vf_init_early(struct xe_device *xe) INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); } +/* + * Notify all GuCs about resource fixups apply finished. + */ +static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + xe_gt_sriov_vf_notify_resfix_done(gt); + } +} + static void vf_post_migration_recovery(struct xe_device *xe) { drm_dbg(&xe->drm, "migration recovery in progress\n"); + xe_pm_runtime_get(xe); /* FIXME: add the recovery steps */ + vf_post_migration_notify_resfix_done(xe); + xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); } -- GitLab From 4be3fca2ce6e207802a4ee36882ececde152221f Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 4 Nov 2024 22:34:48 +0100 Subject: [PATCH 030/882] drm/xe/vf: Start post-migration fixups with provisioning query During post-migration recovery, only MMIO communication to GuC is allowed. The VF KMD needs to use that channel to ask for the new provisioning, which includes a new GGTT range assigned to the VF. v2: query config only instead of handshake; no need to get pm ref as it's now kept through whole recovery (Michal) v3: switched names of 'err' and 'ret' (Michal) Signed-off-by: Tomasz Lis Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241104213449.1455694-5-tomasz.lis@intel.com Signed-off-by: Michal Wajdeczko --- drivers/gpu/drm/xe/xe_sriov_vf.c | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 945893ad967f3..ad29bd835d8e1 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -132,6 +132,31 @@ void xe_sriov_vf_init_early(struct xe_device *xe) INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); } +/** + * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. + * @xe: the &xe_device struct instance + * + * After migration, we need to re-query all VF configuration to make sure + * they match previous provisioning. Note that most of VF provisioning + * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +static int vf_post_migration_requery_guc(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err, ret = 0; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_vf_query_config(gt); + ret = ret ?: err; + } + + return ret; +} + /* * Notify all GuCs about resource fixups apply finished. */ @@ -147,12 +172,23 @@ static void vf_post_migration_notify_resfix_done(struct xe_device *xe) static void vf_post_migration_recovery(struct xe_device *xe) { + int err; + drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); + err = vf_post_migration_requery_guc(xe); + if (unlikely(err)) + goto fail; + /* FIXME: add the recovery steps */ vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); + return; +fail: + xe_pm_runtime_put(xe); + drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); + xe_device_declare_wedged(xe); } static void migration_worker_func(struct work_struct *w) -- GitLab From abd2202047fc75d52dcd729d5a1534f019822e9c Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 4 Nov 2024 22:34:49 +0100 Subject: [PATCH 031/882] drm/xe/vf: Defer fixups if migrated twice fast If another VF migration happened during post-migration recovery, then the current worker should be finished to allow the next one start swiftly and cleanly. Check for defer in two places: before fixups, and before sending RESFIX_DONE. Signed-off-by: Tomasz Lis Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241104213449.1455694-6-tomasz.lis@intel.com Signed-off-by: Michal Wajdeczko --- drivers/gpu/drm/xe/xe_sriov_vf.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index ad29bd835d8e1..c1275e64aa9c6 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -157,6 +157,19 @@ static int vf_post_migration_requery_guc(struct xe_device *xe) return ret; } +/* + * vf_post_migration_imminent - Check if post-restore recovery is coming. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_post_migration_imminent(struct xe_device *xe) +{ + return xe->sriov.vf.migration.gt_flags != 0 || + work_pending(&xe->sriov.vf.migration.worker); +} + /* * Notify all GuCs about resource fixups apply finished. */ @@ -166,8 +179,14 @@ static void vf_post_migration_notify_resfix_done(struct xe_device *xe) unsigned int id; for_each_gt(gt, xe, id) { + if (vf_post_migration_imminent(xe)) + goto skip; xe_gt_sriov_vf_notify_resfix_done(gt); } + return; + +skip: + drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); } static void vf_post_migration_recovery(struct xe_device *xe) @@ -177,6 +196,8 @@ static void vf_post_migration_recovery(struct xe_device *xe) drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); err = vf_post_migration_requery_guc(xe); + if (vf_post_migration_imminent(xe)) + goto defer; if (unlikely(err)) goto fail; @@ -185,6 +206,10 @@ static void vf_post_migration_recovery(struct xe_device *xe) xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; +defer: + xe_pm_runtime_put(xe); + drm_dbg(&xe->drm, "migration recovery deferred\n"); + return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); -- GitLab From b939a08bc378a7c716ad7a9486b48794b95d22f5 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 4 Nov 2024 13:41:03 -0800 Subject: [PATCH 032/882] drm/i915/guc: Flush ct receive tasklet during reset preparation GuC to host communication is interrupt driven, the handling has 3 parts: interrupt context, tasklet and request queue worker. During GuC reset prepare, interrupt is disabled before destroy contexts steps start. The IRQ and worker are flushed to finish any outstanding in-progress message handling. But, the tasklet flush is missing, it might causes 2 race conditions: 1. Tasklet runs after IRQ flushed, add request to queue after worker flush started, causes unexpected G2H message request processing, meanwhile, reset prepare code already get the context destroyed. This will causes error reported about bad context state. (https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11349 and https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12303) 2. Tasklet runs after intel_guc_submission_reset_prepare, ct_try_receive_message start to run, while intel_uc_reset_prepare already finished guc sanitize and set ct->enable to false. This will causes warning on incorrect ct->enable state. (https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12439) Add the missing tasklet flush to flush all 3 parts. Signed-off-by: Zhanjun Dong Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20241104214103.214702-1-zhanjun.dong@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ed979847187f5..6a3d550f6ed15 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1688,6 +1688,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); -- GitLab From c4ed1bb128473cf250df89ad278b37922a810dd3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 5 Nov 2024 18:30:29 +0100 Subject: [PATCH 033/882] drm/xe/guc: Log content of the failed G2H message We are already logging an error once we failed to process a G2H message, but then it's quite hard to extract the content of the broken G2H message from the captured snapshot. Extend our error log with the raw hexdump of the G2H message. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241105173032.1947-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 550eeed43903b..63bd91963eb18 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1299,8 +1299,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) } if (ret) { - xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", - action, ERR_PTR(ret)); + xe_gt_err(gt, "G2H action %#04x failed (%pe) len %u msg %*ph\n", + action, ERR_PTR(ret), hxg_len, (int)sizeof(u32) * hxg_len, hxg); CT_DEAD(ct, NULL, PROCESS_FAILED); } -- GitLab From 94b585401b06e9b2d15ada54b64395c46d9f55c4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 5 Nov 2024 18:30:30 +0100 Subject: [PATCH 034/882] drm/xe/guc: Drop redundant logs about invalid G2H length We are now logging details of the failed G2H message (including its length) at the GuC CT component. Drop now redundant log from the GuC submit code. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241105173032.1947-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9a8564ea06b59..315cac196f26b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1882,15 +1882,12 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; u32 runnable_state = msg[1]; - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 2)) return -EPROTO; - } q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1924,14 +1921,11 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1953,14 +1947,11 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -2000,10 +1991,8 @@ int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) { u32 status; - if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) { - xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len); + if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) return -EPROTO; - } status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) @@ -2018,14 +2007,11 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -2050,10 +2036,8 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le u8 guc_class, instance; u32 reason; - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len != 3)) return -EPROTO; - } guc_class = msg[0]; instance = msg[1]; -- GitLab From 6b5f15445c8d07945e6f209b404fb89968374e88 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 5 Nov 2024 18:30:31 +0100 Subject: [PATCH 035/882] drm/xe/guc: Don't read data from G2H prior to length check While highly unlikely, incoming G2H message might be too short so we shouldn't read any data from it prior to checking a length. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241105173032.1947-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 315cac196f26b..37d4ad8e4f5c8 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1883,12 +1883,14 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_exec_queue *q; - u32 guc_id = msg[0]; - u32 runnable_state = msg[1]; + u32 guc_id, runnable_state; if (unlikely(len < 2)) return -EPROTO; + guc_id = msg[0]; + runnable_state = msg[1]; + q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; @@ -1922,11 +1924,13 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; if (unlikely(len < 1)) return -EPROTO; + guc_id = msg[0]; + q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; @@ -1948,11 +1952,13 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; if (unlikely(len < 1)) return -EPROTO; + guc_id = msg[0]; + q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; @@ -2008,11 +2014,13 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, { struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; if (unlikely(len < 1)) return -EPROTO; + guc_id = msg[0]; + q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; -- GitLab From 44e21ea6dcd5bc9cf7a1c4e3281779987c070373 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 5 Nov 2024 21:45:57 +0100 Subject: [PATCH 036/882] drm/xe/guc: Don't treat GuC generic CAT error as protocol error GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any context. We shouldn't treat that as G2H protocol error that would justify a GT reset, as it may happen due to some VF activity. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241105204557.1991-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_fwif.h | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 08ffe59f22fae..057153f89b30b 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -17,6 +17,7 @@ #define G2H_LEN_DW_TLB_INVALIDATE 3 #define GUC_ID_MAX 65535 +#define GUC_ID_UNKNOWN 0xffffffff #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 37d4ad8e4f5c8..9e0f86f3778b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2021,6 +2021,15 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, guc_id = msg[0]; + if (guc_id == GUC_ID_UNKNOWN) { + /* + * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF + * context. In such case only PF will be notified about that fault. + */ + xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); + return 0; + } + q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; -- GitLab From 9e7aacd8402b88394e6a83cb242901fde77a1773 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 6 Nov 2024 14:49:44 -0800 Subject: [PATCH 037/882] drm/xe: Ensure all locks released in exec IOCTL In couple of places the wrong error handling goto was used to release locks. Fix these to ensure all locks dropped on exec IOCTL errors. Cc: Francois Dugast Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission") Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20241106224944.30130-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 756b492f13b03..31cca938956fc 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -203,14 +203,14 @@ retry: write_locked = false; } if (err) - goto err_syncs; + goto err_hw_exec_mode; if (write_locked) { err = xe_vm_userptr_pin(vm); downgrade_write(&vm->lock); write_locked = false; if (err) - goto err_hw_exec_mode; + goto err_unlock_list; } if (!args->num_batch_buffer) { -- GitLab From 5bd3521d257073818cec6c41359b886e1ba648d2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Nov 2024 16:12:57 +0100 Subject: [PATCH 038/882] drm/xe/guc: Add VF_CFG_SCHED_PRIORITY_KEY KLV definition This KLV allows to set the scheduling priority for each VF, also for the PF. Signed-off-by: Michal Wajdeczko Reviewed-by: Lukasz Laguna Link: https://patchwork.freedesktop.org/patch/msgid/20241106151301.2079-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 37606cf8cc5e1..7dcb118e3d9fd 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -291,6 +291,14 @@ enum { * * :0: (default) * :1-65535: number of contexts (Gen12) + * + * _`GUC_KLV_VF_CFG_SCHED_PRIORITY` : 0x8A0C + * This config controls VF’s scheduling priority. + * + * :0: LOW = schedule VF only if it has active work (default) + * :1: NORMAL = schedule VF always, irrespective of whether it has work or not + * :2: HIGH = schedule VF in the next time-slice after current active + * time-slice completes if it has active work */ #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 @@ -343,6 +351,12 @@ enum { #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u +#define GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY 0x8a0c +#define GUC_KLV_VF_CFG_SCHED_PRIORITY_LEN 1u +#define GUC_SCHED_PRIORITY_LOW 0u +#define GUC_SCHED_PRIORITY_NORMAL 1u +#define GUC_SCHED_PRIORITY_HIGH 2u + /* * Workaround keys: */ -- GitLab From 5a814e3ade40cf5c26dd496ea39c033929889604 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Nov 2024 16:12:58 +0100 Subject: [PATCH 039/882] drm/xe/guc: Add VF_CFG_SCHED_PRIORITY to KLV helper Recognize new VF_CFG_SCHED_PRIORITY key in to_string() helper. Signed-off-by: Michal Wajdeczko Reviewed-by: Lukasz Laguna Link: https://patchwork.freedesktop.org/patch/msgid/20241106151301.2079-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_klv_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c index 9d99fe266d97f..146a6eda9e064 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -49,6 +49,8 @@ const char *xe_guc_klv_key_to_string(u16 key) return "begin_db_id"; case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: return "begin_ctx_id"; + case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY: + return "sched_priority"; /* VF CFG threshold keys */ #define define_threshold_key_to_string_case(TAG, NAME, ...) \ -- GitLab From 7dbed0fdb18ca10de6ef2e80b68fa764b250a1c1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Nov 2024 16:12:59 +0100 Subject: [PATCH 040/882] drm/xe/pf: Add functions to configure VF scheduling priority Add functions to configure PF or VF scheduling priority using the VF_CFG_SCHED_PRIORITY KLV. Signed-off-by: Michal Wajdeczko Reviewed-by: Lukasz Laguna Link: https://patchwork.freedesktop.org/patch/msgid/20241106151301.2079-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 76 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h | 3 + .../gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 2 + 3 files changed, 81 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 192643d63d224..8d84d2a9ab1ec 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -207,6 +207,11 @@ static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout); } +static int pf_push_vf_cfg_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY, priority); +} + static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) { return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); @@ -1767,6 +1772,77 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi return preempt_timeout; } +static const char *sched_priority_unit(u32 priority) +{ + return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" : + priority == GUC_SCHED_PRIORITY_NORMAL ? "(normal)" : + priority == GUC_SCHED_PRIORITY_HIGH ? "(high)" : + "(?)"; +} + +static int pf_provision_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_sched_priority(gt, vfid, priority); + if (unlikely(err)) + return err; + + config->sched_priority = priority; + return 0; +} + +static int pf_get_sched_priority(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->sched_priority; +} + +/** + * xe_gt_sriov_pf_config_set_sched_priority() - Configure scheduling priority. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @priority: requested scheduling priority + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sched_priority(gt, vfid, priority); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, priority, + xe_gt_sriov_pf_config_get_sched_priority(gt, vfid), + "scheduling priority", sched_priority_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_sched_priority - Get VF's scheduling priority. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) scheduling priority. + */ +u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid) +{ + u32 priority; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + priority = pf_get_sched_priority(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return priority; +} + static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config) { lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 0c55aa40a1a7b..f894e9d4abba2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -44,6 +44,9 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); + u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, enum xe_guc_klv_threshold_index index); int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 2d3b73d78f147..686c7b3b6d7a5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -33,6 +33,8 @@ struct xe_gt_sriov_config { u32 exec_quantum; /** @preempt_timeout: preemption timeout in microseconds. */ u32 preempt_timeout; + /** @sched_priority: scheduling priority. */ + u32 sched_priority; /** @thresholds: GuC thresholds for adverse events notifications. */ u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS]; }; -- GitLab From 9d6aabe126b4a27e7b4e86ca1c1c568a7707fd2c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Nov 2024 16:13:00 +0100 Subject: [PATCH 041/882] drm/xe/pf: Allow to control scheduling priority using debugfs Add 'sched_priority' attribute that will represents the scheduling prority of the VF or PF. Values {0,1,2} correspond to priorities {LOW,NORMAL,HIGH} respectively. Signed-off-by: Michal Wajdeczko Reviewed-by: Lukasz Laguna Link: https://patchwork.freedesktop.org/patch/msgid/20241106151301.2079-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 05df4ab3514ba..b2521dd6ec428 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -164,6 +164,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * │   │   ├── contexts_spare * │   │   ├── exec_quantum_ms * │   │   ├── preempt_timeout_us + * │   │   ├── sched_priority * │   ├── vf1 * │   │   ├── ggtt_quota * │   │   ├── lmem_quota @@ -171,6 +172,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * │   │   ├── contexts_quota * │   │   ├── exec_quantum_ms * │   │   ├── preempt_timeout_us + * │   │   ├── sched_priority */ #define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ @@ -209,6 +211,7 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n"); /* * /sys/kernel/debug/dri/0/ @@ -295,6 +298,8 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne &exec_quantum_fops); debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, &preempt_timeout_fops); + debugfs_create_file_unsafe("sched_priority", 0644, parent, parent, + &sched_priority_fops); /* register all threshold attributes */ #define register_threshold_attribute(TAG, NAME, ...) \ -- GitLab From f9b982fbb47be2e6aa824032fe07f5e4dece0019 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Nov 2024 16:13:01 +0100 Subject: [PATCH 042/882] drm/xe/pf: Adjust scheduling priority based on policy change Local values of scheduling priorities need to be adjusted when changing the schedule_if_idle policy as those are related. Disabling schedule_if_idle policy forces the low priority, and enabling schedule_if_idle policy forces the normal priority. Signed-off-by: Michal Wajdeczko Reviewed-by: Lukasz Laguna Link: https://patchwork.freedesktop.org/patch/msgid/20241106151301.2079-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index fae5be5a2a11d..c00fb354705f4 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -135,14 +135,33 @@ static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 valu return 0; } +static void pf_bulk_reset_sched_priority(struct xe_gt *gt, u32 priority) +{ + unsigned int total_vfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n < total_vfs; n++) + gt->sriov.pf.vfs[n].config.sched_priority = priority; +} + static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) { + int err; + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); - return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, - >->sriov.pf.policy.guc.sched_if_idle, - enable); + err = pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, + >->sriov.pf.policy.guc.sched_if_idle, + enable); + + if (!err) + pf_bulk_reset_sched_priority(gt, enable ? GUC_SCHED_PRIORITY_NORMAL : + GUC_SCHED_PRIORITY_LOW); + return err; } static int pf_reprovision_sched_if_idle(struct xe_gt *gt) -- GitLab From faf3cc68b9e43789f2c5debf0117a7d01e4a02bb Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 6 Nov 2024 22:06:06 -0800 Subject: [PATCH 043/882] drm/xe: Add gt_id to xe_sched_job traces In order to uniquely identify the jobs, xe_sched_job tracepoints need to have the tuple (gt_id, guc_id). Find a "hole" where the next entry is unaligned and add one more field. Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241107060606.3130885-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_trace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 91130ad8999cd..de682978c4bfb 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -211,6 +211,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __string(dev, __dev_name_eq(job->q)) __field(u32, seqno) __field(u32, lrc_seqno) + __field(u8, gt_id) __field(u16, guc_id) __field(u32, guc_state) __field(u32, flags) @@ -223,6 +224,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __assign_str(dev); __entry->seqno = xe_sched_job_seqno(job); __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); + __entry->gt_id = job->q->gt->info.id; __entry->guc_id = job->q->guc->id; __entry->guc_state = atomic_read(&job->q->guc->state); @@ -232,9 +234,9 @@ DECLARE_EVENT_CLASS(xe_sched_job, __entry->batch_addr = (u64)job->ptrs[0].batch_addr; ), - TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", __get_str(dev), __entry->fence, __entry->seqno, - __entry->lrc_seqno, __entry->guc_id, + __entry->lrc_seqno, __entry->gt_id, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) ); -- GitLab From f2a6b8e396666d97ada8e8759dfb6a69d8df6380 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 1 Nov 2024 17:01:57 +0000 Subject: [PATCH 044/882] drm/xe: improve hibernation on igpu The GGTT looks to be stored inside stolen memory on igpu which is not treated as normal RAM. The core kernel skips this memory range when creating the hibernation image, therefore when coming back from hibernation the GGTT programming is lost. This seems to cause issues with broken resume where GuC FW fails to load: [drm] *ERROR* GT0: load failed: status = 0x400000A0, time = 10ms, freq = 1250MHz (req 1300MHz), done = -1 [drm] *ERROR* GT0: load failed: status: Reset = 0, BootROM = 0x50, UKernel = 0x00, MIA = 0x00, Auth = 0x01 [drm] *ERROR* GT0: firmware signature verification failed [drm] *ERROR* CRITICAL: Xe has declared device 0000:00:02.0 as wedged. Current GGTT users are kernel internal and tracked as pinned, so it should be possible to hook into the existing save/restore logic that we use for dgpu, where the actual evict is skipped but on restore we importantly restore the GGTT programming. This has been confirmed to fix hibernation on at least ADL and MTL, though likely all igpu platforms are affected. This also means we have a hole in our testing, where the existing s4 tests only really test the driver hooks, and don't go as far as actually rebooting and restoring from the hibernation image and in turn powering down RAM (and therefore losing the contents of stolen). v2 (Brost) - Remove extra newline and drop unnecessary parentheses. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3275 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241101170156.213490-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 37 ++++++++++++++------------------ drivers/gpu/drm/xe/xe_bo_evict.c | 6 ------ 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8286cbc237218..549866da5cd1d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -952,7 +952,10 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (WARN_ON(!xe_bo_is_pinned(bo))) return -EINVAL; - if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm)) + if (WARN_ON(xe_bo_is_vram(bo))) + return -EINVAL; + + if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo))) return -EINVAL; if (!mem_type_is_vram(place->mem_type)) @@ -1774,6 +1777,7 @@ int xe_bo_pin_external(struct xe_bo *bo) int xe_bo_pin(struct xe_bo *bo) { + struct ttm_place *place = &bo->placements[0]; struct xe_device *xe = xe_bo_device(bo); int err; @@ -1804,8 +1808,6 @@ int xe_bo_pin(struct xe_bo *bo) */ if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { - struct ttm_place *place = &(bo->placements[0]); - if (mem_type_is_vram(place->mem_type)) { xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); @@ -1813,13 +1815,12 @@ int xe_bo_pin(struct xe_bo *bo) vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); } + } - if (mem_type_is_vram(place->mem_type) || - bo->flags & XE_BO_FLAG_GGTT) { - spin_lock(&xe->pinned.lock); - list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); - spin_unlock(&xe->pinned.lock); - } + if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); } ttm_bo_pin(&bo->ttm); @@ -1867,24 +1868,18 @@ void xe_bo_unpin_external(struct xe_bo *bo) void xe_bo_unpin(struct xe_bo *bo) { + struct ttm_place *place = &bo->placements[0]; struct xe_device *xe = xe_bo_device(bo); xe_assert(xe, !bo->ttm.base.import_attach); xe_assert(xe, xe_bo_is_pinned(bo)); - if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { - struct ttm_place *place = &(bo->placements[0]); - - if (mem_type_is_vram(place->mem_type) || - bo->flags & XE_BO_FLAG_GGTT) { - spin_lock(&xe->pinned.lock); - xe_assert(xe, !list_empty(&bo->pinned_link)); - list_del_init(&bo->pinned_link); - spin_unlock(&xe->pinned.lock); - } + if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { + spin_lock(&xe->pinned.lock); + xe_assert(xe, !list_empty(&bo->pinned_link)); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); } - ttm_bo_unpin(&bo->ttm); } diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 32043e1e5a863..b01bc20eb90b0 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -34,9 +34,6 @@ int xe_bo_evict_all(struct xe_device *xe) u8 id; int ret; - if (!IS_DGFX(xe)) - return 0; - /* User memory */ for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { struct ttm_resource_manager *man = @@ -125,9 +122,6 @@ int xe_bo_restore_kernel(struct xe_device *xe) struct xe_bo *bo; int ret; - if (!IS_DGFX(xe)) - return 0; - spin_lock(&xe->pinned.lock); for (;;) { bo = list_first_entry_or_null(&xe->pinned.evicted, -- GitLab From 7abccdb4a4d774bf43f2c603cd9c1a27ea359c61 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 7 Nov 2024 20:47:40 +0100 Subject: [PATCH 045/882] drm/xe/guc: Prefer GT oriented asserts in submit code For better diagnostics, use xe_gt_assert() instead of xe_assert(). Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241107194741.2167-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 52 ++++++++++++------------------ 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9e0f86f3778b2..0311d9e50d066 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -412,12 +412,11 @@ static const int xe_exec_queue_prio_to_guc[] = { static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); @@ -451,12 +450,11 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_MLRC_REG_SIZE]; int len = 0; int i; - xe_assert(xe, xe_exec_queue_is_parallel(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -479,7 +477,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); + xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE xe_guc_ct_send(&guc->ct, action, len, 0, 0); @@ -513,7 +511,7 @@ static void register_exec_queue(struct xe_exec_queue *q) struct xe_lrc *lrc = q->lrc[0]; struct guc_ctxt_registration_info info; - xe_assert(xe, !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -603,7 +601,7 @@ static int wq_noop_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wq_space_until_wrap(q))) return -ENODEV; - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); + xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | @@ -643,13 +641,13 @@ static void wq_item_append(struct xe_exec_queue *q) wqi[i++] = lrc->ring.tail / sizeof(u64); } - xe_assert(xe, i == wqi_size / sizeof(u32)); + xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[q->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); + xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); xe_device_wmb(xe); @@ -661,7 +659,6 @@ static void wq_item_append(struct xe_exec_queue *q) static void submit_exec_queue(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_lrc *lrc = q->lrc[0]; u32 action[3]; u32 g2h_len = 0; @@ -669,7 +666,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) int len = 0; bool extra_submit = false; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); if (xe_exec_queue_is_parallel(q)) wq_item_append(q); @@ -716,12 +713,11 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct dma_fence *fence = NULL; bool lr = xe_exec_queue_is_lr(q); - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); + xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || + exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); @@ -867,7 +863,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_gpu_scheduler *sched = &ge->sched; bool wedged; - xe_assert(xe, xe_exec_queue_is_lr(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); @@ -1274,9 +1270,8 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); if (exec_queue_registered(q)) @@ -1312,11 +1307,10 @@ static void __suspend_fence_signal(struct xe_exec_queue *q) static void suspend_fence_signal(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - xe_guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || + xe_guc_read_stopped(guc)); + xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); __suspend_fence_signal(q); } @@ -1412,12 +1406,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; int err, i; - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); + xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1630,9 +1623,8 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q) struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); xe_sched_msg_lock(sched); guc_exec_queue_try_add_msg(q, msg, RESUME); @@ -1744,9 +1736,8 @@ void xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, xe_guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); @@ -1788,9 +1779,8 @@ int xe_guc_submit_start(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, xe_guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); @@ -1825,8 +1815,8 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) return NULL; } - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); + xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); + xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); return q; } -- GitLab From fb3cb67e68767463bd5bf31324b553fb49d9b6e3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 7 Nov 2024 20:47:41 +0100 Subject: [PATCH 046/882] drm/xe/guc: Prefer GT oriented logs in submit code For better diagnostics, use xe_gt_err() instead of drm_err(). Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241107194741.2167-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0311d9e50d066..c70b75a3eb85f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -763,7 +763,6 @@ static void disable_scheduling_deregister(struct xe_guc *guc, struct xe_exec_queue *q) { MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_device *xe = guc_to_xe(guc); int ret; set_min_preemption_timeout(guc, q); @@ -773,7 +772,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, if (!ret) { struct xe_gpu_scheduler *sched = &q->guc->sched; - drm_warn(&xe->drm, "Pending enable failed to respond"); + xe_gt_warn(q->gt, "Pending enable failed to respond\n"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); @@ -817,7 +816,7 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) */ void xe_guc_submit_wedge(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; int err; @@ -827,7 +826,8 @@ void xe_guc_submit_wedge(struct xe_guc *guc) err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); return; } @@ -859,7 +859,6 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) container_of(w, struct xe_guc_exec_queue, lr_tdr); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; bool wedged; @@ -897,7 +896,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) !exec_queue_pending_disable(q) || xe_guc_read_stopped(guc), HZ * 5); if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); + xe_gt_warn(q->gt, "Schedule disable failed to respond\n"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); return; @@ -1801,17 +1800,17 @@ int xe_guc_submit_start(struct xe_guc *guc) static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); return NULL; } q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); return NULL; } @@ -2039,7 +2038,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; @@ -2051,10 +2050,10 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le reason = msg[2]; /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); + xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); - xe_gt_reset_async(guc_to_gt(guc)); + xe_gt_reset_async(gt); return 0; } -- GitLab From f7278da76da2589af13bc8a487d0e8f8abdecc99 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 7 Nov 2024 16:13:57 +0100 Subject: [PATCH 047/882] drm/xe/guc: Do not assert CTB state while sending MMIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During VF post-migration recovery, MMIO communication channel to GuC is used, despite CTB channel being enabled. This behavior is rooted in the save-restore architecture specification. Therefore, a VF driver cannot assert that CTB is disabled while sending MMIO messages to GuC. Such assertion needs to be PF only, or be removed. This patch simply removes the assertion. Signed-off-by: Tomasz Lis Suggested-by: Michał Wajdeczko Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20241107151357.1623733-2-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7224593c9ce9b..df1ba94cf4ca3 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -945,7 +945,6 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); - xe_assert(xe, !xe_guc_ct_enabled(&guc->ct)); xe_assert(xe, len); xe_assert(xe, len <= VF_SW_FLAG_COUNT); xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); -- GitLab From 666e1960464140cc4bc9203c203097e70b54c95a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 5 Nov 2024 14:38:16 +0100 Subject: [PATCH 048/882] drm/rockchip: cdn-dp: Use drm_connector_helper_hpd_irq_event() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code for detecting and updating the connector status in cdn_dp_pd_event_work() has a number of problems. - It does not aquire the locks to call the detect helper and update the connector status. These are struct drm_mode_config.connection_mutex and struct drm_mode_config.mutex. - It does not use drm_helper_probe_detect(), which helps with the details of locking and detection. - It uses the connector's status field to determine a change to the connector status. The epoch_counter field is the correct one. The field signals a change even if the connector status' value did not change. Replace the code with a call to drm_connector_helper_hpd_irq_event(), which fixes all these problems. Signed-off-by: Thomas Zimmermann Fixes: 81632df69772 ("drm/rockchip: cdn-dp: do not use drm_helper_hpd_irq_event") Cc: Chris Zhong Cc: Guenter Roeck Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: dri-devel@lists.freedesktop.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Cc: # v4.11+ Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20241105133848.480407-1-tzimmermann@suse.de --- drivers/gpu/drm/rockchip/cdn-dp-core.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index b04538907f956..f576b1aa86d14 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -947,9 +947,6 @@ static void cdn_dp_pd_event_work(struct work_struct *work) { struct cdn_dp_device *dp = container_of(work, struct cdn_dp_device, event_work); - struct drm_connector *connector = &dp->connector; - enum drm_connector_status old_status; - int ret; mutex_lock(&dp->lock); @@ -1009,11 +1006,7 @@ static void cdn_dp_pd_event_work(struct work_struct *work) out: mutex_unlock(&dp->lock); - - old_status = connector->status; - connector->status = connector->funcs->detect(connector, false); - if (old_status != connector->status) - drm_kms_helper_hotplug_event(dp->drm_dev); + drm_connector_helper_hpd_irq_event(&dp->connector); } static int cdn_dp_pd_event(struct notifier_block *nb, -- GitLab From 712ec5de382d009396ced509e75b392d28871aa4 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Mon, 28 Oct 2024 15:41:34 +0800 Subject: [PATCH 049/882] drm/rockchip: vop2: Don't spam logs in atomic update Demote the error message to drm_dbg_kms to only print the message if the respective debug messages are enabled. Signed-off-by: Andy Yan Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20241028074140.382199-1-andyshrk@163.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 22 +++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 9873172e3fd33..312ca816c6468 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1271,8 +1271,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, dsp_w = drm_rect_width(dest); if (dest->x1 + dsp_w > adjusted_mode->hdisplay) { - drm_err(vop2->drm, "vp%d %s dest->x1[%d] + dsp_w[%d] exceed mode hdisplay[%d]\n", - vp->id, win->data->name, dest->x1, dsp_w, adjusted_mode->hdisplay); + drm_dbg_kms(vop2->drm, + "vp%d %s dest->x1[%d] + dsp_w[%d] exceed mode hdisplay[%d]\n", + vp->id, win->data->name, dest->x1, dsp_w, adjusted_mode->hdisplay); dsp_w = adjusted_mode->hdisplay - dest->x1; if (dsp_w < 4) dsp_w = 4; @@ -1282,8 +1283,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, dsp_h = drm_rect_height(dest); if (dest->y1 + dsp_h > adjusted_mode->vdisplay) { - drm_err(vop2->drm, "vp%d %s dest->y1[%d] + dsp_h[%d] exceed mode vdisplay[%d]\n", - vp->id, win->data->name, dest->y1, dsp_h, adjusted_mode->vdisplay); + drm_dbg_kms(vop2->drm, + "vp%d %s dest->y1[%d] + dsp_h[%d] exceed mode vdisplay[%d]\n", + vp->id, win->data->name, dest->y1, dsp_h, adjusted_mode->vdisplay); dsp_h = adjusted_mode->vdisplay - dest->y1; if (dsp_h < 4) dsp_h = 4; @@ -1296,15 +1298,15 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, */ if (!(win->data->feature & WIN_FEATURE_AFBDC)) { if (actual_w > dsp_w && (actual_w & 0xf) == 1) { - drm_err(vop2->drm, "vp%d %s act_w[%d] MODE 16 == 1\n", - vp->id, win->data->name, actual_w); + drm_dbg_kms(vop2->drm, "vp%d %s act_w[%d] MODE 16 == 1\n", + vp->id, win->data->name, actual_w); actual_w -= 1; } } if (afbc_en && actual_w % 4) { - drm_err(vop2->drm, "vp%d %s actual_w[%d] not 4 pixel aligned\n", - vp->id, win->data->name, actual_w); + drm_dbg_kms(vop2->drm, "vp%d %s actual_w[%d] not 4 pixel aligned\n", + vp->id, win->data->name, actual_w); actual_w = ALIGN_DOWN(actual_w, 4); } @@ -1341,8 +1343,8 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, */ stride = (fb->pitches[0] << 3) / bpp; if ((stride & 0x3f) && (xmirror || rotate_90 || rotate_270)) - drm_err(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n", - vp->id, win->data->name, stride); + drm_dbg_kms(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n", + vp->id, win->data->name, stride); uv_swap = vop2_afbc_uv_swap(fb->format->format); /* -- GitLab From 4f537776340dab2b680a4d8554567f6884240d0b Mon Sep 17 00:00:00 2001 From: Piotr Zalewski Date: Fri, 1 Nov 2024 19:01:17 +0000 Subject: [PATCH 050/882] rockchip/drm: vop2: add support for gamma LUT Add support for gamma LUT in VOP2 driver. The implementation was inspired by one found in VOP1 driver. Blue and red channels in gamma LUT register write were swapped with respect to how gamma LUT values are written in VOP1. Gamma LUT port selection was added before the write of new gamma LUT table. If the current SoC is rk356x, check if no other CRTC has gamma LUT enabled in atomic_check (only one video port can use gamma LUT at a time) and disable gamma LUT before the LUT table write. If the current SoC isn't rk356x, "seamless" gamma lut update is performed similarly to how it was done in the case of RK3399 in VOP1[1]. In seamless update gamma LUT disable before the write isn't necessary, check if no other CRTC has gamma LUT enabled is also not necessary, different register is being used to select gamma LUT port[2] and after setting DSP_LUT_EN bit, GAMMA_UPDATE_EN bit is set[3]. Gamma size is set and drm color management is enabled for each video port's CRTC except ones which have no associated device. Patch was tested on RK3566 (Pinetab2). When using userspace tools which set eg. constant color temperature no issues were noticed. When using userspace tools which adjust eg. color temperature the slight screen flicker is visible probably because of gamma LUT disable needed in the case of RK356x before gamma LUT write. Compare behaviour of eg.: ``` gammastep -O 3000 ``` To eg.: ``` gammastep -l 53:23 -t 6000:3000 ``` In latter case color temperature is slowly adjusted at the beginning which causes screen to slighly flicker. Then it adjusts every few seconds which also causes slight screen flicker. [1] https://lists.infradead.org/pipermail/linux-rockchip/2021-October/028132.html [2] https://lore.kernel.org/linux-rockchip/48249708-8c05-40d2-a5d8-23de960c5a77@rock-chips.com/ [3] https://github.com/radxa/kernel/blob/linux-6.1-stan-rkr1/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c#L3437 Helped-by: Daniel Stone Helped-by: Dragan Simic Helped-by: Diederik de Haas Helped-by: Andy Yan Signed-off-by: Piotr Zalewski Reviewed-by: Andy Yan Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20241101185545.559090-3-pZ010001011111@proton.me --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 186 +++++++++++++++++++ drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 5 + 2 files changed, 191 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 312ca816c6468..9ad025aa9ab05 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -278,6 +278,15 @@ static u32 vop2_readl(struct vop2 *vop2, u32 offset) return val; } +static u32 vop2_vp_read(struct vop2_video_port *vp, u32 offset) +{ + u32 val; + + regmap_read(vp->vop2->map, vp->data->offset + offset, &val); + + return val; +} + static void vop2_win_write(const struct vop2_win *win, unsigned int reg, u32 v) { regmap_field_write(win->reg[reg], v); @@ -998,6 +1007,67 @@ static void vop2_disable(struct vop2 *vop2) clk_disable_unprepare(vop2->hclk); } +static bool vop2_vp_dsp_lut_is_enabled(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + return dsp_ctrl & RK3568_VP_DSP_CTRL__DSP_LUT_EN; +} + +static void vop2_vp_dsp_lut_disable(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + dsp_ctrl &= ~RK3568_VP_DSP_CTRL__DSP_LUT_EN; + vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); +} + +static bool vop2_vp_dsp_lut_poll_disabled(struct vop2_video_port *vp) +{ + u32 dsp_ctrl; + int ret = readx_poll_timeout(vop2_vp_dsp_lut_is_enabled, vp, dsp_ctrl, + !dsp_ctrl, 5, 30 * 1000); + if (ret) { + drm_err(vp->vop2->drm, "display LUT RAM enable timeout!\n"); + return false; + } + + return true; +} + +static void vop2_vp_dsp_lut_enable(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_LUT_EN; + vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); +} + +static void vop2_vp_dsp_lut_update_enable(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + dsp_ctrl |= RK3588_VP_DSP_CTRL__GAMMA_UPDATE_EN; + vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); +} + +static inline bool vop2_supports_seamless_gamma_lut_update(struct vop2 *vop2) +{ + return (vop2->data->soc_id != 3566 && vop2->data->soc_id != 3568); +} + +static bool vop2_gamma_lut_in_use(struct vop2 *vop2, struct vop2_video_port *vp) +{ + const int nr_vps = vop2->data->nr_vps; + int gamma_en_vp_id; + + for (gamma_en_vp_id = 0; gamma_en_vp_id < nr_vps; gamma_en_vp_id++) + if (vop2_vp_dsp_lut_is_enabled(&vop2->vps[gamma_en_vp_id])) + break; + + return gamma_en_vp_id != nr_vps && gamma_en_vp_id != vp->id; +} + static void vop2_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -1484,6 +1554,77 @@ static bool vop2_crtc_mode_fixup(struct drm_crtc *crtc, return true; } +static void vop2_crtc_write_gamma_lut(struct vop2 *vop2, struct drm_crtc *crtc) +{ + const struct vop2_video_port *vp = to_vop2_video_port(crtc); + const struct vop2_video_port_data *vp_data = &vop2->data->vp[vp->id]; + struct drm_color_lut *lut = crtc->state->gamma_lut->data; + unsigned int i, bpc = ilog2(vp_data->gamma_lut_len); + u32 word; + + for (i = 0; i < crtc->gamma_size; i++) { + word = (drm_color_lut_extract(lut[i].blue, bpc) << (2 * bpc)) | + (drm_color_lut_extract(lut[i].green, bpc) << bpc) | + drm_color_lut_extract(lut[i].red, bpc); + + writel(word, vop2->lut_regs + i * 4); + } +} + +static void vop2_crtc_atomic_set_gamma_seamless(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc) +{ + vop2_writel(vop2, RK3568_LUT_PORT_SEL, + FIELD_PREP(RK3588_LUT_PORT_SEL__GAMMA_AHB_WRITE_SEL, vp->id)); + vop2_vp_dsp_lut_enable(vp); + vop2_crtc_write_gamma_lut(vop2, crtc); + vop2_vp_dsp_lut_update_enable(vp); +} + +static void vop2_crtc_atomic_set_gamma_rk356x(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc) +{ + vop2_vp_dsp_lut_disable(vp); + vop2_cfg_done(vp); + if (!vop2_vp_dsp_lut_poll_disabled(vp)) + return; + + vop2_writel(vop2, RK3568_LUT_PORT_SEL, vp->id); + vop2_crtc_write_gamma_lut(vop2, crtc); + vop2_vp_dsp_lut_enable(vp); +} + +static void vop2_crtc_atomic_try_set_gamma(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state) +{ + if (!vop2->lut_regs || !crtc_state->color_mgmt_changed) + return; + + if (!crtc_state->gamma_lut) { + vop2_vp_dsp_lut_disable(vp); + return; + } + + if (vop2_supports_seamless_gamma_lut_update(vop2)) + vop2_crtc_atomic_set_gamma_seamless(vop2, vp, crtc); + else + vop2_crtc_atomic_set_gamma_rk356x(vop2, vp, crtc); +} + +static inline void vop2_crtc_atomic_try_set_gamma_locked(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state) +{ + vop2_lock(vop2); + vop2_crtc_atomic_try_set_gamma(vop2, vp, crtc, crtc_state); + vop2_unlock(vop2); +} + static void vop2_dither_setup(struct drm_crtc *crtc, u32 *dsp_ctrl) { struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state); @@ -2059,11 +2200,40 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); + vop2_crtc_atomic_try_set_gamma(vop2, vp, crtc, crtc_state); + drm_crtc_vblank_on(crtc); vop2_unlock(vop2); } +static int vop2_crtc_atomic_check_gamma(struct vop2_video_port *vp, + struct drm_crtc *crtc, + struct drm_atomic_state *state, + struct drm_crtc_state *crtc_state) +{ + struct vop2 *vop2 = vp->vop2; + unsigned int len; + + if (!vp->vop2->lut_regs || !crtc_state->color_mgmt_changed || + !crtc_state->gamma_lut) + return 0; + + len = drm_color_lut_size(crtc_state->gamma_lut); + if (len != crtc->gamma_size) { + drm_dbg(vop2->drm, "Invalid LUT size; got %d, expected %d\n", + len, crtc->gamma_size); + return -EINVAL; + } + + if (!vop2_supports_seamless_gamma_lut_update(vop2) && vop2_gamma_lut_in_use(vop2, vp)) { + drm_info(vop2->drm, "Gamma LUT can be enabled for only one CRTC at a time\n"); + return -EINVAL; + } + + return 0; +} + static int vop2_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -2071,6 +2241,11 @@ static int vop2_crtc_atomic_check(struct drm_crtc *crtc, struct drm_plane *plane; int nplanes = 0; struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + int ret; + + ret = vop2_crtc_atomic_check_gamma(vp, crtc, state, crtc_state); + if (ret) + return ret; drm_atomic_crtc_state_for_each_plane(plane, crtc_state) nplanes++; @@ -2489,7 +2664,13 @@ static void vop2_crtc_atomic_begin(struct drm_crtc *crtc, static void vop2_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct vop2_video_port *vp = to_vop2_video_port(crtc); + struct vop2 *vop2 = vp->vop2; + + /* In case of modeset, gamma lut update already happened in atomic enable */ + if (!drm_atomic_crtc_needs_modeset(crtc_state)) + vop2_crtc_atomic_try_set_gamma_locked(vop2, vp, crtc, crtc_state); vop2_post_config(crtc); @@ -2792,7 +2973,12 @@ static int vop2_create_crtcs(struct vop2 *vop2) } drm_crtc_helper_add(&vp->crtc, &vop2_crtc_helper_funcs); + if (vop2->lut_regs) { + const struct vop2_video_port_data *vp_data = &vop2_data->vp[vp->id]; + drm_mode_crtc_set_gamma_size(&vp->crtc, vp_data->gamma_lut_len); + drm_crtc_enable_color_mgmt(&vp->crtc, 0, false, vp_data->gamma_lut_len); + } init_completion(&vp->dsp_hold_completion); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h index 615a16196aff6..510dda6f90922 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h @@ -394,6 +394,7 @@ enum dst_factor_mode { #define RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN BIT(15) #define RK3568_VP_DSP_CTRL__STANDBY BIT(31) +#define RK3568_VP_DSP_CTRL__DSP_LUT_EN BIT(28) #define RK3568_VP_DSP_CTRL__DITHER_DOWN_MODE BIT(20) #define RK3568_VP_DSP_CTRL__DITHER_DOWN_SEL GENMASK(19, 18) #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN BIT(17) @@ -408,6 +409,8 @@ enum dst_factor_mode { #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV BIT(4) #define RK3568_VP_DSP_CTRL__OUT_MODE GENMASK(3, 0) +#define RK3588_VP_DSP_CTRL__GAMMA_UPDATE_EN BIT(22) + #define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV GENMASK(3, 2) #define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV GENMASK(1, 0) @@ -460,6 +463,8 @@ enum dst_factor_mode { #define RK3588_DSP_IF_POL__DP1_PIN_POL GENMASK(14, 12) #define RK3588_DSP_IF_POL__DP0_PIN_POL GENMASK(10, 8) +#define RK3588_LUT_PORT_SEL__GAMMA_AHB_WRITE_SEL GENMASK(13, 12) + #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK BIT(5) #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2 BIT(4) -- GitLab From e987e22e9229d70c2083a91cc61269b2c4924955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 6 Nov 2024 09:16:51 -0300 Subject: [PATCH 051/882] drm/v3d: Fix performance counter source settings on V3D 7.x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the new register addresses were introduced for V3D 7.x, we added new masks for performance counter sources on V3D 7.x. Nevertheless, we never apply these new masks when setting the sources. Fix the performance counter source settings on V3D 7.x by introducing a new macro, `V3D_SET_FIELD_VER`, which allows fields setting to vary by version. Using this macro, we can provide different values for source mask based on the V3D version, ensuring that sources are correctly configure on V3D 7.x. Fixes: 0ad5bc1ce463 ("drm/v3d: fix up register addresses for V3D 7.x") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Reviewed-by: Christian Gmeiner Link: https://patchwork.freedesktop.org/patch/msgid/20241106121736.5707-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_debugfs.c | 4 ++-- drivers/gpu/drm/v3d/v3d_perfmon.c | 15 ++++++++------- drivers/gpu/drm/v3d/v3d_regs.h | 29 +++++++++++++++++------------ 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 19e3ee7ac897f..76816f2551c10 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -237,8 +237,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) if (v3d->ver >= 40) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, - V3D_SET_FIELD(cycle_count_reg, - V3D_PCTR_S0)); + V3D_SET_FIELD_VER(cycle_count_reg, + V3D_PCTR_S0, v3d->ver)); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1); } else { diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 156be13ab2efb..7df6dd933c63f 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -240,17 +240,18 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) for (i = 0; i < ncounters; i++) { u32 source = i / 4; - u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0); + u32 channel = V3D_SET_FIELD_VER(perfmon->counters[i], V3D_PCTR_S0, + v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S1); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S1, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S2); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S2, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S3); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S3, v3d->ver); V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); } diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 1b1a62ad95852..6da3c69082bd6 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -15,6 +15,14 @@ fieldval & field##_MASK; \ }) +#define V3D_SET_FIELD_VER(value, field, ver) \ + ({ \ + typeof(ver) _ver = (ver); \ + u32 fieldval = (value) << field##_SHIFT(_ver); \ + WARN_ON((fieldval & ~field##_MASK(_ver)) != 0); \ + fieldval & field##_MASK(_ver); \ + }) + #define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ field##_SHIFT) @@ -354,18 +362,15 @@ #define V3D_V4_PCTR_0_SRC_28_31 0x0067c #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ 4 * (x)) -# define V3D_PCTR_S0_MASK V3D_MASK(6, 0) -# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0) -# define V3D_PCTR_S0_SHIFT 0 -# define V3D_PCTR_S1_MASK V3D_MASK(14, 8) -# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8) -# define V3D_PCTR_S1_SHIFT 8 -# define V3D_PCTR_S2_MASK V3D_MASK(22, 16) -# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16) -# define V3D_PCTR_S2_SHIFT 16 -# define V3D_PCTR_S3_MASK V3D_MASK(30, 24) -# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24) -# define V3D_PCTR_S3_SHIFT 24 +# define V3D_PCTR_S0_MASK(ver) (((ver) >= 71) ? V3D_MASK(7, 0) : V3D_MASK(6, 0)) +# define V3D_PCTR_S0_SHIFT(ver) 0 +# define V3D_PCTR_S1_MASK(ver) (((ver) >= 71) ? V3D_MASK(15, 8) : V3D_MASK(14, 8)) +# define V3D_PCTR_S1_SHIFT(ver) 8 +# define V3D_PCTR_S2_MASK(ver) (((ver) >= 71) ? V3D_MASK(23, 16) : V3D_MASK(22, 16)) +# define V3D_PCTR_S2_SHIFT(ver) 16 +# define V3D_PCTR_S3_MASK(ver) (((ver) >= 71) ? V3D_MASK(31, 24) : V3D_MASK(30, 24)) +# define V3D_PCTR_S3_SHIFT(ver) 24 + #define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32) /* Output values of the counters. */ -- GitLab From d84927a4ad9244c974061b6299c5169399813ddb Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 6 Oct 2024 00:20:17 +0100 Subject: [PATCH 052/882] drm/bridge: cdns-mhdp8546: Remove unused functions cdns_mhdp_hdcp_set_lc() and cdns_mhdp_hdcp_set_public_key_param() were added by commit 6a3608eae6d3 ("drm: bridge: cdns-mhdp8546: Enable HDCP") but never used. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20241005232017.305217-1-linux@treblig.org --- .../drm/bridge/cadence/cdns-mhdp8546-hdcp.c | 28 ------------------- .../drm/bridge/cadence/cdns-mhdp8546-hdcp.h | 3 -- 2 files changed, 31 deletions(-) diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c index 31832ba4017f1..42248f179b69d 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c @@ -500,34 +500,6 @@ static void cdns_mhdp_hdcp_prop_work(struct work_struct *work) drm_modeset_unlock(&dev->mode_config.connection_mutex); } -int cdns_mhdp_hdcp_set_lc(struct cdns_mhdp_device *mhdp, u8 *val) -{ - int ret; - - mutex_lock(&mhdp->mbox_mutex); - ret = cdns_mhdp_secure_mailbox_send(mhdp, MB_MODULE_ID_HDCP_GENERAL, - HDCP_GENERAL_SET_LC_128, - 16, val); - mutex_unlock(&mhdp->mbox_mutex); - - return ret; -} - -int -cdns_mhdp_hdcp_set_public_key_param(struct cdns_mhdp_device *mhdp, - struct cdns_hdcp_tx_public_key_param *val) -{ - int ret; - - mutex_lock(&mhdp->mbox_mutex); - ret = cdns_mhdp_secure_mailbox_send(mhdp, MB_MODULE_ID_HDCP_TX, - HDCP2X_TX_SET_PUBLIC_KEY_PARAMS, - sizeof(*val), (u8 *)val); - mutex_unlock(&mhdp->mbox_mutex); - - return ret; -} - int cdns_mhdp_hdcp_enable(struct cdns_mhdp_device *mhdp, u8 content_type) { int ret; diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h index 334c0b8b0d4f5..3b6ec9c3a8d8b 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h @@ -82,9 +82,6 @@ struct cdns_hdcp_tx_public_key_param { u8 E[DLP_E]; }; -int cdns_mhdp_hdcp_set_public_key_param(struct cdns_mhdp_device *mhdp, - struct cdns_hdcp_tx_public_key_param *val); -int cdns_mhdp_hdcp_set_lc(struct cdns_mhdp_device *mhdp, u8 *val); int cdns_mhdp_hdcp_enable(struct cdns_mhdp_device *mhdp, u8 content_type); int cdns_mhdp_hdcp_disable(struct cdns_mhdp_device *mhdp); void cdns_mhdp_hdcp_init(struct cdns_mhdp_device *mhdp); -- GitLab From cd8e9956552dd2155f6e5ae55eb9c268233c104a Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 5 Nov 2024 17:03:27 +0100 Subject: [PATCH 053/882] drm/xe: Take job list lock in xe_sched_first_pending_job Access to the pending_list should always happens under job_list_lock. Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241105160327.2970277-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 64b2ae6839db2..c250ea773491e 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -71,8 +71,14 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, static inline struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) { - return list_first_entry_or_null(&sched->base.pending_list, - struct xe_sched_job, drm.list); + struct xe_sched_job *job; + + spin_lock(&sched->base.job_list_lock); + job = list_first_entry_or_null(&sched->base.pending_list, + struct xe_sched_job, drm.list); + spin_unlock(&sched->base.job_list_lock); + + return job; } static inline int -- GitLab From 17558f97fe62fbe14757880a0aa998bfd194ea95 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 31 Oct 2024 22:42:07 +0000 Subject: [PATCH 054/882] gpu: drm: replace of_graph_get_next_endpoint() From DT point of view, in general, drivers should be asking for a specific port number because their function is fixed in the binding. of_graph_get_next_endpoint() doesn't match to this concept. Simply replace - of_graph_get_next_endpoint(xxx, NULL); + of_graph_get_endpoint_by_regs(xxx, 0, -1); Link: https://lore.kernel.org/r/20240202174941.GA310089-robh@kernel.org Signed-off-by: Kuninori Morimoto Reviewed-by: Laurent Pinchart Reviewed-by: Dmitry Baryshkov Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/87frob3neo.wl-kuninori.morimoto.gx@renesas.com --- drivers/gpu/drm/drm_of.c | 4 +++- drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c | 2 +- drivers/gpu/drm/tiny/arcpgu.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index 5c2abc9eca9c2..5530919e0ba05 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -564,6 +564,8 @@ EXPORT_SYMBOL_GPL(drm_of_get_data_lanes_count_ep); * Gets parent DSI bus for a DSI device controlled through a bus other * than MIPI-DCS (SPI, I2C, etc.) using the Device Tree. * + * This function assumes that the device's port@0 is the DSI input. + * * Returns pointer to mipi_dsi_host if successful, -EINVAL if the * request is unsupported, -EPROBE_DEFER if the DSI host is found but * not available, or -ENODEV otherwise. @@ -576,7 +578,7 @@ struct mipi_dsi_host *drm_of_get_dsi_bus(struct device *dev) /* * Get first endpoint child from device. */ - endpoint = of_graph_get_next_endpoint(dev->of_node, NULL); + endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1); if (!endpoint) return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index 4618c892cdd65..e10e469aa7a6c 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -400,7 +400,7 @@ static int rpi_touchscreen_probe(struct i2c_client *i2c) rpi_touchscreen_i2c_write(ts, REG_POWERON, 0); /* Look up the DSI host. It needs to probe before we do. */ - endpoint = of_graph_get_next_endpoint(dev->of_node, NULL); + endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1); if (!endpoint) return -ENODEV; diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c index 81abedec435db..b2dc02df74f89 100644 --- a/drivers/gpu/drm/tiny/arcpgu.c +++ b/drivers/gpu/drm/tiny/arcpgu.c @@ -289,7 +289,7 @@ static int arcpgu_load(struct arcpgu_drm_private *arcpgu) * There is only one output port inside each device. It is linked with * encoder endpoint. */ - endpoint_node = of_graph_get_next_endpoint(pdev->dev.of_node, NULL); + endpoint_node = of_graph_get_endpoint_by_regs(pdev->dev.of_node, 0, -1); if (endpoint_node) { encoder_node = of_graph_get_remote_port_parent(endpoint_node); of_node_put(endpoint_node); -- GitLab From 90e315a58fd903e43457fa085b6644e45c047e31 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 5 Nov 2024 16:24:01 -0800 Subject: [PATCH 055/882] drm/xe/gsc: Improve SW proxy error checking and logging If an error occurs in the GSC<->CSME handshake, the GSC will send a PROXY_END msg to the driver with the status set to an error code. We currently don't check the status when receiving a PROXY_END message and instead check the proxy initialization status in the FWSTS reg; therefore, while still catching any initialization failures, we lose the actual returned error code. This can be easily improved by checking the status value and printing it to dmesg if it's an error. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20241106002402.486700-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_gsc_proxy.c | 47 +++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index fc64b45d324b9..24cc6a4f9a96a 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -139,17 +139,29 @@ static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size) return 0; } -static int validate_proxy_header(struct xe_gsc_proxy_header *header, +static int validate_proxy_header(struct xe_gt *gt, + struct xe_gsc_proxy_header *header, u32 source, u32 dest, u32 max_size) { u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr); u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr); + int ret = 0; - if (header->destination != dest || header->source != source) - return -ENOEXEC; + if (header->destination != dest || header->source != source) { + ret = -ENOEXEC; + goto out; + } - if (length + PROXY_HDR_SIZE > max_size) - return -E2BIG; + if (length + PROXY_HDR_SIZE > max_size) { + ret = -E2BIG; + goto out; + } + + /* We only care about the status if this is a message for the driver */ + if (dest == GSC_PROXY_ADDRESSING_KMD && header->status != 0) { + ret = -EIO; + goto out; + } switch (type) { case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD: @@ -157,12 +169,20 @@ static int validate_proxy_header(struct xe_gsc_proxy_header *header, break; fallthrough; case GSC_PROXY_MSG_TYPE_PROXY_INVALID: - return -EIO; + ret = -EIO; + break; default: break; } - return 0; +out: + if (ret) + xe_gt_err(gt, + "GSC proxy error: s=0x%x[0x%x], d=0x%x[0x%x], t=%u, l=0x%x, st=0x%x\n", + header->source, source, header->destination, dest, + type, length, header->status); + + return ret; } #define proxy_header_wr(xe_, map_, offset_, field_, val_) \ @@ -228,12 +248,17 @@ static int proxy_query(struct xe_gsc *gsc) xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc, reply_offset, PROXY_HDR_SIZE); - /* stop if this was the last message */ - if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) + /* Check the status and stop if this was the last message */ + if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) { + ret = validate_proxy_header(gt, to_csme_hdr, + GSC_PROXY_ADDRESSING_GSC, + GSC_PROXY_ADDRESSING_KMD, + GSC_PROXY_BUFFER_SIZE - reply_offset); break; + } /* make sure the GSC-to-CSME proxy header is sane */ - ret = validate_proxy_header(to_csme_hdr, + ret = validate_proxy_header(gt, to_csme_hdr, GSC_PROXY_ADDRESSING_GSC, GSC_PROXY_ADDRESSING_CSME, GSC_PROXY_BUFFER_SIZE - reply_offset); @@ -262,7 +287,7 @@ static int proxy_query(struct xe_gsc *gsc) } /* make sure the CSME-to-GSC proxy header is sane */ - ret = validate_proxy_header(gsc->proxy.from_csme, + ret = validate_proxy_header(gt, gsc->proxy.from_csme, GSC_PROXY_ADDRESSING_CSME, GSC_PROXY_ADDRESSING_GSC, GSC_PROXY_BUFFER_SIZE - reply_offset); -- GitLab From 902de142150c84eaabcfee31f83c8693b40c13ca Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 8 Nov 2024 13:27:36 -0800 Subject: [PATCH 056/882] drm/xe/guc: Reduce default GuC log verbosity Drop the default verbosity from 5 (max) to 3 as the extra verbosity generally doesn't provide anything vitally important but does cause rapid log overflow. Signed-off-by: John Harrison Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241108212737.2044007-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index bfc3deebdaa20..07b27114be9af 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,7 +19,7 @@ struct xe_modparam xe_modparam = { .probe_display = true, - .guc_log_level = 5, + .guc_log_level = 3, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, /* the rest are 0 by default */ -- GitLab From 7d4d1c54c40ef8469ab46b9a6677f380f9b01e75 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 8 Nov 2024 13:27:37 -0800 Subject: [PATCH 057/882] drm/xe/guc: Support crash dump notification from GuC Add support for the two crash dump notifications from GuC. Either one means GuC is toast, so just capture state trigger a reset. Signed-off-by: John Harrison Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241108212737.2044007-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 63bd91963eb18..7eb175a0b874e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -54,6 +54,7 @@ enum { CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */ + CT_DEAD_CRASH, /* 0x8000 */ }; static void ct_dead_worker_func(struct work_struct *w); @@ -1120,6 +1121,24 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) return 0; } +static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) +{ + struct xe_gt *gt = ct_to_gt(ct); + + if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) + xe_gt_err(gt, "GuC Crash dump notification\n"); + else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION) + xe_gt_err(gt, "GuC Exception notification\n"); + else + xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action); + + CT_DEAD(ct, NULL, CRASH); + + kick_reset(ct); + + return 0; +} + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1294,6 +1313,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case GUC_ACTION_GUC2PF_ADVERSE_EVENT: ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len); break; + case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: + case XE_GUC_ACTION_NOTIFY_EXCEPTION: + ret = guc_crash_process_msg(ct, action); + break; default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } -- GitLab From c8b3c6db941299d7cc31bd9befed3518fdebaf68 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 12 Nov 2024 16:28:28 +0000 Subject: [PATCH 058/882] drm/xe: handle flat ccs during hibernation on igpu Starting from LNL, CCS has moved over to flat CCS model where there is now dedicated memory reserved for storing compression state. On platforms like LNL this reserved memory lives inside graphics stolen memory, which is not treated like normal RAM and is therefore skipped by the core kernel when creating the hibernation image. Currently if something was compressed and we enter hibernation all the corresponding CCS state is lost on such HW, resulting in corrupted memory. To fix this evict user buffers from TT -> SYSTEM to ensure we take a snapshot of the raw CCS state when entering hibernation, where upon resuming we can restore the raw CCS state back when next validating the buffer. This has been confirmed to fix display corruption on LNL when coming back from hibernation. Fixes: cbdc52c11c9b ("drm/xe/xe2: Support flat ccs") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3409 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241112162827.116523-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bo_evict.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index b01bc20eb90b0..8fb2be0610035 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -35,10 +35,21 @@ int xe_bo_evict_all(struct xe_device *xe) int ret; /* User memory */ - for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + for (mem_type = XE_PL_TT; mem_type <= XE_PL_VRAM1; ++mem_type) { struct ttm_resource_manager *man = ttm_manager_type(bdev, mem_type); + /* + * On igpu platforms with flat CCS we need to ensure we save and restore any CCS + * state since this state lives inside graphics stolen memory which doesn't survive + * hibernation. + * + * This can be further improved by only evicting objects that we know have actually + * used a compression enabled PAT index. + */ + if (mem_type == XE_PL_TT && (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe))) + continue; + if (man) { ret = ttm_resource_manager_evict_all(bdev, man); if (ret) -- GitLab From 901dd2617c9c3554b2449c8844b6338009112fcf Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 6 Nov 2024 11:55:49 +0100 Subject: [PATCH 059/882] accel/ivpu: Fix Qemu crash when running in passthrough Restore PCI state after putting the NPU in D0. Restoring state before powering up the device caused a Qemu crash if NPU was running in passthrough mode and recovery was performed. Fixes: 3534eacbf101 ("accel/ivpu: Fix PCI D0 state entry in resume") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Karol Wachowski Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20241106105549.2757115-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index dbc0711e28d13..6821051dfa3a7 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -78,8 +78,8 @@ static int ivpu_resume(struct ivpu_device *vdev) int ret; retry: - pci_restore_state(to_pci_dev(vdev->drm.dev)); pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); + pci_restore_state(to_pci_dev(vdev->drm.dev)); ret = ivpu_hw_power_up(vdev); if (ret) { -- GitLab From 0fd4380c050d71334eb61067f3228a5d57172a45 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 7 Nov 2024 21:33:16 -0800 Subject: [PATCH 060/882] drm/xe: Wait on killed exec queues When an exec queue is killed it triggers an async process of asking the GuC to schedule the context out. The timestamp in the context image is only updated when this process completes. In case a userspace process kills an exec and tries to read the timestamp, it may not get an updated runtime. Add synchronization between the process reading the fdinfo and the exec queue being killed. After reading all the timestamps, wait on exec queues in the process of being killed. When that wait is over, xe_exec_queue_fini() was already called and updated the timestamps. v2: Do not update pending_removal before validating user args (Matthew Auld) v3: Move wait on pending to be done before getting any timestamp so it's more likely for the gpu and exec queue timestamps to be closer together Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2667 Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20241108053318.3483678-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_types.h | 5 +++++ drivers/gpu/drm/xe/xe_drm_client.c | 7 +++++++ drivers/gpu/drm/xe/xe_exec_queue.c | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index bccca63c8a48c..fffbb7d1c40b4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -607,6 +607,11 @@ struct xe_file { * which does things while being held. */ struct mutex lock; + /** + * @exec_queue.pending_removal: items pending to be removed to + * synchronize GPU state update with ongoing query. + */ + atomic_t pending_removal; } exec_queue; /** @run_ticks: hw engine class run time in ticks for this drm client */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 22f0f1a6dfd55..dd4e16a84874c 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -280,6 +280,13 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) u64 gpu_timestamp; unsigned int fw_ref; + /* + * Wait for any exec queue going away: their cycles will get updated on + * context switch out, so wait for that to happen + */ + wait_var_event(&xef->exec_queue.pending_removal, + !atomic_read(&xef->exec_queue.pending_removal)); + xe_pm_runtime_get(xe); /* Accumulate all the exec queues from this client */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fd0f3b3c9101d..ff556773c1063 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -262,8 +262,11 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) /* * Before releasing our ref to lrc and xef, accumulate our run ticks + * and wakeup any waiters. */ xe_exec_queue_update_run_ticks(q); + if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) + wake_up_var(&q->xef->exec_queue.pending_removal); for (i = 0; i < q->width; ++i) xe_lrc_put(q->lrc[i]); @@ -826,7 +829,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->exec_queue.lock); q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); + if (q) + atomic_inc(&xef->exec_queue.pending_removal); mutex_unlock(&xef->exec_queue.lock); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; -- GitLab From db696095b08fb7186fedb93ce216f67121ec9b44 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 7 Nov 2024 21:33:17 -0800 Subject: [PATCH 061/882] drm/xe: Sample gpu timestamp closer to exec queues Move the force_wake_get to the beginning of the function so the gpu timestamp can be closer to the sampled value for exec queues. This avoids additional delays waiting for force wake ack which can make the proportion between cycles/total_cycles fluctuate around the real value. For a gputop-like application getting 2 samples to calculate the utilization: sample 0: read_exec_queue_timestamp <<<< (A) read_gpu_timestamp sample 1: read_exec_queue_timestamp <<<<< (B) read_gpu_timestamp In the above case, utilization can be bigger or smaller than it should be, depending on if (A) or (B) receives additional delay, respectively. With this a LNL system that was failing on `xe_drm_fdinfo --r utilization-single-full-load` after ~60 iterations, get to run to 100 without a failure. This is still not perfect, and it's easy to introduce errors by just loading the CPU with `stress --cpu $(nproc)` - the same igt test in this case fails after 2 or 3 iterations. That will be dealt with in the test itself, using a longer sampling period. v2: Rename function and add another to get "any engine", preparing for caching the hwe in future (Umesh / Jonathan) Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241108053318.3483678-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_drm_client.c | 73 ++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index dd4e16a84874c..298a587da7f17 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -269,6 +269,49 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) } } +static struct xe_hw_engine *any_engine(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned long gt_id; + + for_each_gt(gt, xe, gt_id) { + struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt); + + if (hwe) + return hwe; + } + + return NULL; +} + +static bool force_wake_get_any_engine(struct xe_device *xe, + struct xe_hw_engine **phwe, + unsigned int *pfw_ref) +{ + enum xe_force_wake_domains domain; + unsigned int fw_ref; + struct xe_hw_engine *hwe; + struct xe_force_wake *fw; + + hwe = any_engine(xe); + if (!hwe) + return false; + + domain = xe_hw_engine_to_fw_domain(hwe); + fw = gt_to_fw(hwe->gt); + + fw_ref = xe_force_wake_get(fw, domain); + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { + xe_force_wake_put(fw, fw_ref); + return false; + } + + *phwe = hwe; + *pfw_ref = fw_ref; + + return true; +} + static void show_run_ticks(struct drm_printer *p, struct drm_file *file) { unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; @@ -288,6 +331,10 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) !atomic_read(&xef->exec_queue.pending_removal)); xe_pm_runtime_get(xe); + if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) { + xe_pm_runtime_put(xe); + return; + } /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); @@ -302,33 +349,11 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) } mutex_unlock(&xef->exec_queue.lock); - /* Get the total GPU cycles */ - for_each_gt(gt, xe, gt_id) { - enum xe_force_wake_domains fw; - - hwe = xe_gt_any_hw_engine(gt); - if (!hwe) - continue; - - fw = xe_hw_engine_to_fw_domain(hwe); - - fw_ref = xe_force_wake_get(gt_to_fw(gt), fw); - if (!xe_force_wake_ref_has_domain(fw_ref, fw)) { - hwe = NULL; - xe_force_wake_put(gt_to_fw(gt), fw_ref); - break; - } - - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - break; - } + gpu_timestamp = xe_hw_engine_read_timestamp(hwe); + xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref); xe_pm_runtime_put(xe); - if (unlikely(!hwe)) - return; - for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { const char *class_name; -- GitLab From b107c63d2953907908fd0cafb0e543b3c3167b75 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 8 Nov 2024 19:20:03 -0800 Subject: [PATCH 062/882] drm/xe/oa: Fix "Missing outer runtime PM protection" warning Fix the following drm_WARN: [953.586396] xe 0000:00:02.0: [drm] Missing outer runtime PM protection ... <4> [953.587090] ? xe_pm_runtime_get_noresume+0x8d/0xa0 [xe] <4> [953.587208] guc_exec_queue_add_msg+0x28/0x130 [xe] <4> [953.587319] guc_exec_queue_fini+0x3a/0x40 [xe] <4> [953.587425] xe_exec_queue_destroy+0xb3/0xf0 [xe] <4> [953.587515] xe_oa_release+0x9c/0xc0 [xe] Suggested-by: John Harrison Suggested-by: Matthew Brost Fixes: e936f885f1e9 ("drm/xe/oa/uapi: Expose OA stream fd") Cc: stable@vger.kernel.org Signed-off-by: Ashutosh Dixit Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241109032003.3093811-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fd2ffe8df1561..8dd55798ab312 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1614,9 +1614,11 @@ static int xe_oa_release(struct inode *inode, struct file *file) struct xe_oa_stream *stream = file->private_data; struct xe_gt *gt = stream->gt; + xe_pm_runtime_get(gt_to_xe(gt)); mutex_lock(>->oa.gt_lock); xe_oa_destroy_locked(stream); mutex_unlock(>->oa.gt_lock); + xe_pm_runtime_put(gt_to_xe(gt)); /* Release the reference the OA stream kept on the driver */ drm_dev_put(>_to_xe(gt)->drm); -- GitLab From 9d42476f71a9a000ea2d72aae5f4d43c5061fe10 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 13 Nov 2024 17:22:12 +0100 Subject: [PATCH 063/882] drm/xe: Allow fault injection in vm create and vm bind IOCTLs Use fault injection infrastructure to allow specific functions to be configured over debugfs for failing during the execution of xe_vm_create_ioctl() and xe_vm_bind_ioctl(). This allows more thorough testing from user space by going through code paths for error handling and unwinding which cannot be reached by simply injecting errors in IOCTL arguments. This can help increase code robustness. v2: Add xe_pt_update_ops_{prepare,run} (Matthew Brost) Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241113162212.2154103-1-francois.dugast@intel.com Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_exec_queue.c | 1 + drivers/gpu/drm/xe/xe_pt.c | 3 +++ drivers/gpu/drm/xe/xe_vm.c | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index ff556773c1063..aab9e561153dc 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -240,6 +240,7 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, return q; } +ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); void xe_exec_queue_destroy(struct kref *ref) { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index f27f579f4d85a..684dc075deac8 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -136,6 +136,7 @@ err_kfree: xe_pt_free(pt); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO); /** * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero @@ -1851,6 +1852,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) return 0; } +ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2131,6 +2133,7 @@ kill_vm_tile1: return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO); /** * xe_pt_update_ops_fini() - Finish PT update operations diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 624133fae5f55..2e67648ed5121 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -740,6 +740,7 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) return 0; } +ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); static void xe_vma_ops_fini(struct xe_vma_ops *vops) { @@ -1352,6 +1353,7 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, return 0; } +ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); static void xe_vm_free_scratch(struct xe_vm *vm) { @@ -1978,6 +1980,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, return ops; } +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, u16 pat_index, unsigned int flags) @@ -2697,6 +2700,7 @@ unlock: drm_exec_fini(&exec); return err; } +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); #define SUPPORTED_FLAGS_STUB \ (DRM_XE_VM_BIND_FLAG_READONLY | \ -- GitLab From 68634b12d769831dcf317f2736665fce6f895655 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 12 Nov 2024 18:01:23 +0100 Subject: [PATCH 064/882] drm/xe: Ignore GGTT TLB inval errors during GT reset During GT reset, GGTT TLB invalidations may fail. This is acceptable as the reset will clear GGTT caches. Suppress only -ECANCELED other return codes are still unexpected error. v2: Add code comment(Matt). Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3389 Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241112170123.1236443-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 3cb228c773cd4..4e9f893921605 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -253,9 +253,18 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, 0, /* seqno, replaced in send_tlb_invalidation */ MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; + int ret; + + ret = send_tlb_invalidation(>->uc.guc, fence, action, + ARRAY_SIZE(action)); + /* + * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches + * should be nuked on a GT reset so this error can be ignored. + */ + if (ret == -ECANCELED) + return 0; - return send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); + return ret; } /** -- GitLab From 10304796918a1d771f9bd187af6520eea0479bc1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:16 -0800 Subject: [PATCH 065/882] drm/xe: Add xe_ring_lrc_is_idle() helper Add helper to compare ring head and tail to determine if LRC is idle. v2: - Fix kernel doc (CI, Zhanjun) Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 13 +++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index c70b75a3eb85f..663ad4d97b34c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1696,7 +1696,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) ban = true; } } else if (xe_exec_queue_is_lr(q) && - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { + !xe_lrc_ring_is_idle(q->lrc[0])) { ban = true; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 4b65da77c6e09..e0d80e8201ebf 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1763,3 +1763,16 @@ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) return lrc->ctx_timestamp; } + +/** + * xe_lrc_ring_is_idle() - LRC is idle + * @lrc: Pointer to the lrc. + * + * Compare LRC ring head and tail to determine if idle. + * + * Return: True is ring is idle, False otherwise + */ +bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) +{ + return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 40d8f6906d3e4..9d64cedc4d147 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -78,6 +78,8 @@ u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); +bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); + u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); -- GitLab From 37aa19fa724548d84285a5e358c5ec179b4a43cc Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:17 -0800 Subject: [PATCH 066/882] drm/xe: Add ring address to LRC snapshot The ring is currently in LRC BO but this may change going forward. Include the ring address in the snapshot protecting again any future changes. v2: - s/ring_desc/ring_addr (Jonathan) Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 3 +++ drivers/gpu/drm/xe/xe_lrc.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index e0d80e8201ebf..cc77e51321575 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1636,6 +1636,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); + snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; @@ -1693,6 +1694,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer return; drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tHW Ring address: 0x%08x\n", + snapshot->ring_addr); drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", snapshot->indirect_context_desc); drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 9d64cedc4d147..37ca321ed492b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -25,6 +25,7 @@ struct xe_lrc_snapshot { unsigned long lrc_size, lrc_offset; u32 context_desc; + u32 ring_addr; u32 indirect_context_desc; u32 head; struct { -- GitLab From 9a1fce9df46d8b0789666522d78ffe1495add270 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:18 -0800 Subject: [PATCH 067/882] drm/xe: Add ring start to LRC snapshot Add LRC ring start register to LRC snapshot to verify no LRC register corruption upon hang. This could be possible if the indirect ring state was mapped to user space or via an internal KMD memory corruption. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index cc77e51321575..22e58c6e2a35e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1061,6 +1061,14 @@ u32 xe_lrc_ring_tail(struct xe_lrc *lrc) return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; } +static u32 xe_lrc_ring_start(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); +} + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) { if (xe_lrc_has_indirect_ring_state(lrc)) @@ -1641,6 +1649,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; snapshot->tail.memory = xe_lrc_ring_tail(lrc); + snapshot->start = xe_lrc_ring_start(lrc); snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); @@ -1701,6 +1710,7 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", snapshot->tail.internal, snapshot->tail.memory); + drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 37ca321ed492b..b459dcab87870 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -28,6 +28,7 @@ struct xe_lrc_snapshot { u32 ring_addr; u32 indirect_context_desc; u32 head; + u32 start; struct { u32 internal; u32 memory; -- GitLab From f62e6edfc11d30d07ce48d783da149dda4f7e78f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:19 -0800 Subject: [PATCH 068/882] drm/xe: Add exec queue param to devcoredump During capture time, the target job may be unavailable (e.g., if it's in LR mode). However, the associated exec queue will be available regardless, so add an exec queue param for such cases. v2: - Reword commit message (Jonathan) Cc: Zhanjun Dong Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 15 +++++++++------ drivers/gpu/drm/xe/xe_devcoredump.h | 6 ++++-- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d3570d3d573c2..c32cbb46ef8cb 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -238,10 +238,10 @@ static void xe_devcoredump_free(void *data) } static void devcoredump_snapshot(struct xe_devcoredump *coredump, + struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; - struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); u32 adj_logical_mask = q->logical_mask; u32 width_mask = (0x1 << q->width) - 1; @@ -278,10 +278,12 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); ss->ge = xe_guc_exec_queue_snapshot_capture(q); - ss->job = xe_sched_job_snapshot_capture(job); + if (job) + ss->job = xe_sched_job_snapshot_capture(job); ss->vm = xe_vm_snapshot_capture(q->vm); - xe_engine_snapshot_capture_for_job(job); + if (job) + xe_engine_snapshot_capture_for_job(job); queue_work(system_unbound_wq, &ss->work); @@ -291,15 +293,16 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, /** * xe_devcoredump - Take the required snapshots and initialize coredump device. + * @q: The faulty xe_exec_queue, where the issue was detected. * @job: The faulty xe_sched_job, where the issue was detected. * * This function should be called at the crash time within the serialized * gt_reset. It is skipped if we still have the core dump device available * with the information of the 'first' snapshot. */ -void xe_devcoredump(struct xe_sched_job *job) +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job) { - struct xe_device *xe = gt_to_xe(job->q->gt); + struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; if (coredump->captured) { @@ -308,7 +311,7 @@ void xe_devcoredump(struct xe_sched_job *job) } coredump->captured = true; - devcoredump_snapshot(coredump, job); + devcoredump_snapshot(coredump, q, job); drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index a4eebc285fc83..c04a534e3384e 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -10,13 +10,15 @@ struct drm_printer; struct xe_device; +struct xe_exec_queue; struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP -void xe_devcoredump(struct xe_sched_job *job); +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job); int xe_devcoredump_init(struct xe_device *xe); #else -static inline void xe_devcoredump(struct xe_sched_job *job) +static inline void xe_devcoredump(struct xe_exec_queue *q, + struct xe_sched_job *job) { } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 663ad4d97b34c..08a6578ee104c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1154,7 +1154,7 @@ trigger_reset: trace_xe_sched_job_timedout(job); if (!exec_queue_killed(q)) - xe_devcoredump(job); + xe_devcoredump(q, job); /* * Kernel jobs should never fail, nor should VM jobs if they do -- GitLab From 990c29c2dd22e46395831bcff7b06a6ca9ee0573 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:20 -0800 Subject: [PATCH 069/882] drm/xe: Improve schedule disable response failure Print Guc ID and take devcoredump on schedule disable response failure. GuC ID is useful information and a schedule disable response failure is possible the LRC state is corrupted so a devcoredump is helpful to debug. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 08a6578ee104c..46fd4621bfca2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1124,7 +1124,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!ret || xe_guc_read_stopped(guc)) { trigger_reset: if (!ret) - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); + xe_gt_warn(guc_to_gt(guc), + "Schedule disable failed to respond, guc_id=%d", + q->guc->id); + xe_devcoredump(q, job); set_exec_queue_extra_ref(q); xe_exec_queue_get(q); /* GT reset owns this */ set_exec_queue_banned(q); -- GitLab From a54b0de7ed72ca8e0012061c580b8447973eb82e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:21 -0800 Subject: [PATCH 070/882] drm/xe: Change xe_engine_snapshot_capture_for_job to be for queue During capture time, the target job may be unavailable (e.g., if it's in LR mode). However, the associated exec queue will be available regardless, change xe_engine_snapshot_capture_for_job to take a queue argument ann rename to xe_engine_snapshot_capture_for_queue. v2: - Reword commit message (Jonathan) - Remove redundant queueu check (Zhanjun) - Remove devcoredump job member (Zhanjun) Cc: Zhanjun Dong Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 5 +--- drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 -- drivers/gpu/drm/xe/xe_guc_capture.c | 29 ++++++++++------------- drivers/gpu/drm/xe/xe_guc_capture.h | 6 ++--- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_engine.c | 8 +++---- drivers/gpu/drm/xe/xe_hw_engine.h | 4 ++-- 7 files changed, 24 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index c32cbb46ef8cb..0e5edf14a241d 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -232,7 +232,6 @@ static void xe_devcoredump_free(void *data) /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); coredump->captured = false; - coredump->job = NULL; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); } @@ -259,7 +258,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, strscpy(ss->process_name, process_name); ss->gt = q->gt; - coredump->job = job; INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); @@ -282,8 +280,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->job = xe_sched_job_snapshot_capture(job); ss->vm = xe_vm_snapshot_capture(q->vm); - if (job) - xe_engine_snapshot_capture_for_job(job); + xe_engine_snapshot_capture_for_queue(q); queue_work(system_unbound_wq, &ss->work); diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 3703ddea12528..be4d59ea9ac8f 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -80,8 +80,6 @@ struct xe_devcoredump { bool captured; /** @snapshot: Snapshot is captured at time of the first crash */ struct xe_devcoredump_snapshot snapshot; - /** @job: Point to the faulting job */ - struct xe_sched_job *job; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index cc72446a5de11..cd6d80b5b51da 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1793,29 +1793,24 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm } /** - * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job. - * @job: The job object. + * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue. + * @q: The exec queue object * - * Search within the capture outlist for the job, could be used for check if - * GuC capture is ready for the job. + * Search within the capture outlist for the queue, could be used for check if + * GuC capture is ready for the queue. * If found, the locked boolean of the node will be flagged. * * Returns: found guc-capture node ptr else NULL */ struct __guc_capture_parsed_output * -xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) +xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - struct xe_exec_queue *q; struct xe_device *xe; u16 guc_class = GUC_LAST_ENGINE_CLASS + 1; struct xe_devcoredump_snapshot *ss; - if (!job) - return NULL; - - q = job->q; if (!q || !q->gt) return NULL; @@ -1827,7 +1822,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC) return ss->matched_node; - /* Find hwe for the job */ + /* Find hwe for the queue */ for_each_hw_engine(hwe, q->gt, id) { if (hwe != q->hwe) continue; @@ -1859,17 +1854,16 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) } /** - * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine - * @job: The job object + * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine + * @q: The exec queue object * * Take snapshot of associated HW Engine * * Returns: None. */ void -xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) +xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q) { - struct xe_exec_queue *q = job->q; struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; struct xe_hw_engine *hwe; @@ -1887,11 +1881,12 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) } if (!coredump->snapshot.hwe[id]) { - coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job); + coredump->snapshot.hwe[id] = + xe_hw_engine_snapshot_capture(hwe, q); } else { struct __guc_capture_parsed_output *new; - new = xe_guc_capture_get_matching_and_lock(job); + new = xe_guc_capture_get_matching_and_lock(q); if (new) { struct xe_guc *guc = &q->gt->uc.guc; diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h index 97a795d13dd13..20a078dc4b856 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.h +++ b/drivers/gpu/drm/xe/xe_guc_capture.h @@ -11,10 +11,10 @@ #include "xe_guc.h" #include "xe_guc_fwif.h" +struct xe_exec_queue; struct xe_guc; struct xe_hw_engine; struct xe_hw_engine_snapshot; -struct xe_sched_job; static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class) { @@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc); const struct __guc_mmio_reg_descr_group * xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type, enum guc_capture_list_class_type capture_class, bool is_ext); -struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job); +struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q); void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot); void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p); -void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job); +void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q); void xe_guc_capture_steered_list_init(struct xe_guc *guc); void xe_guc_capture_put_matched_nodes(struct xe_guc *guc); int xe_guc_capture_init(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 46fd4621bfca2..985a21a72da48 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1061,13 +1061,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * do manual capture first and decide later if we need to use it */ if (!exec_queue_killed(q) && !xe->devcoredump.captured && - !xe_guc_capture_get_matching_and_lock(job)) { + !xe_guc_capture_get_matching_and_lock(q)) { /* take force wake before engine register manual capture */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); - xe_engine_snapshot_capture_for_job(job); + xe_engine_snapshot_capture_for_queue(q); xe_force_wake_put(gt_to_fw(q->gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1557acee3523f..0cfa2b9282be4 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -829,7 +829,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. - * @job: The job object. + * @q: The exec queue object. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -838,7 +838,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) * caller, using `xe_hw_engine_snapshot_free`. */ struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job) +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) { struct xe_hw_engine_snapshot *snapshot; struct __guc_capture_parsed_output *node; @@ -864,9 +864,9 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) return snapshot; - if (job) { + if (q) { /* If got guc capture, set source to GuC */ - node = xe_guc_capture_get_matching_and_lock(job); + node = xe_guc_capture_get_matching_and_lock(q); if (node) { struct xe_device *xe = gt_to_xe(hwe->gt); struct xe_devcoredump *coredump = &xe->devcoredump; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index da0a6922a26f3..6b5f9fa2a5948 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -11,7 +11,7 @@ struct drm_printer; struct drm_xe_engine_class_instance; struct xe_device; -struct xe_sched_job; +struct xe_exec_queue; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -56,7 +56,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, enum xe_engine_class engine_class); struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job); +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q); void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); -- GitLab From dbf670f13949249f09b872ae59aab2feb259ca5f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 13 Nov 2024 18:25:22 -0800 Subject: [PATCH 071/882] drm/xe: Wire devcoredump to LR TDR LR queues can hang, cause engine reset, or cause IOMMU CAT errors. Collect an error capture when this occurs. v2: - s/queue's/queues (Jonathan) v4: - Fix build (CI) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-8-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 985a21a72da48..f9ecee5364d82 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -896,13 +896,18 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) !exec_queue_pending_disable(q) || xe_guc_read_stopped(guc), HZ * 5); if (!ret) { - xe_gt_warn(q->gt, "Schedule disable failed to respond\n"); + xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); + xe_devcoredump(q, NULL); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); return; } } + if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) + xe_devcoredump(q, NULL); + xe_sched_submission_start(sched); } -- GitLab From ec26ea94b990963229bffef88dea2831e884dc09 Mon Sep 17 00:00:00 2001 From: Raphael Gallais-Pou Date: Tue, 29 Oct 2024 19:30:41 +0100 Subject: [PATCH 072/882] MAINTAINERS: add Raphael Gallais-Pou to DRM/STi maintainers Add myself as a maintainer for STi driver changes. Signed-off-by: Raphael Gallais-Pou Acked-by: Alain Volmat Link: https://patchwork.freedesktop.org/patch/msgid/20241029183041.137414-1-rgallaispou@gmail.com Signed-off-by: Alain Volmat --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f5dc00438ba39..08e1f793ee455 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7723,6 +7723,7 @@ F: drivers/gpu/drm/rockchip/ DRM DRIVERS FOR STI M: Alain Volmat +M: Raphael Gallais-Pou L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git -- GitLab From 86caee745e4506528801d9542db54e7b4c4d834b Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 21 Jun 2024 22:17:55 +0200 Subject: [PATCH 073/882] drm/rockchip: analogix_dp: allow to work without panel When the DP output is routed to a external connector there is no need for a fixed panel, as the panel may be detected via EDID on the AUX channel. Allow to continue probing if no panel reference is present. Signed-off-by: Lucas Stach Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240621201755.500271-1-l.stach@pengutronix.de --- drivers/gpu/drm/rockchip/analogix_dp-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index d3341edfe4f4f..956b1be78b378 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -386,7 +386,7 @@ static int rockchip_dp_probe(struct platform_device *pdev) return -ENODEV; ret = drm_of_find_panel_or_bridge(dev->of_node, 1, 0, &panel, NULL); - if (ret < 0) + if (ret < 0 && ret != -ENODEV) return ret; dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); -- GitLab From ae4a812a64dad3fd4f7bbcd7af215cb68af8cf8c Mon Sep 17 00:00:00 2001 From: Langyan Ye Date: Wed, 13 Nov 2024 17:00:22 +0800 Subject: [PATCH 074/882] drm/panel-edp: Add KDB KD116N2130B12 Add support for the KDB KD116N2130B12, pleace the EDID here for subsequent reference. 00 ff ff ff ff ff ff 00 2c 82 07 17 00 00 00 00 1c 21 01 04 95 1a 0e 78 0a 63 25 99 5b 5d 96 26 18 50 54 00 00 00 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 87 1b 56 88 50 00 0e 30 28 20 55 00 00 90 10 00 00 18 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fe 00 4b 44 31 31 36 4e 32 31 33 30 42 31 32 00 17 Signed-off-by: Langyan Ye Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20241113090022.332586-1-yelangyan@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-edp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 8566e9cf2f82a..d42c8496e80c2 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1802,6 +1802,12 @@ static const struct panel_delay delay_200_500_e50_po2e200 = { .powered_on_to_enable = 200, }; +static const struct panel_delay delay_200_150_e50 = { + .hpd_absent = 200, + .unprepare = 150, + .enable = 50, +}; + #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \ { \ .ident = { \ @@ -1963,6 +1969,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('K', 'D', 'B', 0x1118, &delay_200_500_e50, "KD116N29-30NK-A005"), EDP_PANEL_ENTRY('K', 'D', 'B', 0x1120, &delay_200_500_e80_d50, "116N29-30NK-C007"), EDP_PANEL_ENTRY('K', 'D', 'B', 0x1212, &delay_200_500_e50, "KD116N0930A16"), + EDP_PANEL_ENTRY('K', 'D', 'B', 0x1707, &delay_200_150_e50, "KD116N2130B12"), EDP_PANEL_ENTRY('K', 'D', 'C', 0x044f, &delay_200_500_e50, "KD116N9-30NH-F3"), EDP_PANEL_ENTRY('K', 'D', 'C', 0x05f1, &delay_200_500_e80_d50, "KD116N5-30NV-G7"), -- GitLab From 223217b800c2509c2f4c9c98e46c2090e44dd12b Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Thu, 7 Nov 2024 13:38:41 -0800 Subject: [PATCH 075/882] drm/xe/guc: Remove duplicate source field xe_hw_engine_snapshot.source save the information of where data copied from. Because the 'source' field is already populated inside 'matched_node' ptr hanging off xe_devcoredump_snapshot, which happenned either in guc_capture_extract_reglists or xe_engine_manual_capture, we can remove this redundant copy of 'source' from xe_hw_engine_snapshot. Signed-off-by: Zhanjun Dong Reviewed-by: Alan Previn Changes from prior revs: v2:- Update commit message Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20241107213841.436384-1-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc_capture.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_engine.c | 2 -- drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index cd6d80b5b51da..f87755af545f8 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1759,7 +1759,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm if (!devcore_snapshot->matched_node) return; - xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC); xe_gt_assert(gt, snapshot->hwe); capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class); @@ -1768,7 +1767,8 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm snapshot->name ? snapshot->name : "", snapshot->logical_instance); drm_printf(p, "\tCapture_source: %s\n", - snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual"); + devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? + "GuC" : "Manual"); drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", snapshot->forcewake.domain, snapshot->forcewake.ref); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 0cfa2b9282be4..c4b0dc3be39c3 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -872,7 +872,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) struct xe_devcoredump *coredump = &xe->devcoredump; coredump->snapshot.matched_node = node; - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC; xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); return snapshot; } @@ -880,7 +879,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) /* otherwise, do manual capture */ xe_engine_manual_capture(hwe, snapshot); - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL; xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); return snapshot; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 719f27ef00a56..e14bee95e3643 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -165,8 +165,6 @@ enum xe_hw_engine_snapshot_source_id { struct xe_hw_engine_snapshot { /** @name: name of the hw engine */ char *name; - /** @source: Data source, either manual or GuC */ - enum xe_hw_engine_snapshot_source_id source; /** @hwe: hw engine */ struct xe_hw_engine *hwe; /** @logical_instance: logical instance of this hw engine */ -- GitLab From a49e7e8125a112d5dc63ff024191cd264c6260a8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 8 Nov 2024 16:42:37 +0100 Subject: [PATCH 076/882] drm: Move client code to clients/ subdirectory Just move some files around to keep source code well organized. Plus fix a type in the help text of CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM. No functional changes. Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20241108154600.126162-2-tzimmermann@suse.de --- drivers/gpu/drm/Kconfig | 72 +----------------- drivers/gpu/drm/Makefile | 9 +-- drivers/gpu/drm/clients/Kconfig | 73 +++++++++++++++++++ drivers/gpu/drm/clients/Makefile | 5 ++ .../gpu/drm/{ => clients}/drm_client_setup.c | 0 .../gpu/drm/{ => clients}/drm_fbdev_client.c | 0 6 files changed, 80 insertions(+), 79 deletions(-) create mode 100644 drivers/gpu/drm/clients/Kconfig create mode 100644 drivers/gpu/drm/clients/Makefile rename drivers/gpu/drm/{ => clients}/drm_client_setup.c (100%) rename drivers/gpu/drm/{ => clients}/drm_fbdev_client.c (100%) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5504721007cc1..fd294ccca6bb7 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -217,77 +217,7 @@ config DRM_CLIENT option. Drivers that support the default clients should select DRM_CLIENT_SELECTION instead. -config DRM_CLIENT_LIB - tristate - depends on DRM - select DRM_KMS_HELPER if DRM_FBDEV_EMULATION - select FB_CORE if DRM_FBDEV_EMULATION - help - This option enables the DRM client library and selects all - modules and components according to the enabled clients. - -config DRM_CLIENT_SELECTION - tristate - depends on DRM - select DRM_CLIENT_LIB if DRM_FBDEV_EMULATION - help - Drivers that support in-kernel DRM clients have to select this - option. - -config DRM_CLIENT_SETUP - bool - depends on DRM_CLIENT_SELECTION - help - Enables the DRM client selection. DRM drivers that support the - default clients should select DRM_CLIENT_SELECTION instead. - -menu "Supported DRM clients" - depends on DRM_CLIENT_SELECTION - -config DRM_FBDEV_EMULATION - bool "Enable legacy fbdev support for your modesetting driver" - depends on DRM_CLIENT_SELECTION - select DRM_CLIENT - select DRM_CLIENT_SETUP - select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE - default FB - help - Choose this option if you have a need for the legacy fbdev - support. Note that this support also provides the linux console - support on top of your modesetting driver. - - If in doubt, say "Y". - -config DRM_FBDEV_OVERALLOC - int "Overallocation of the fbdev buffer" - depends on DRM_FBDEV_EMULATION - default 100 - help - Defines the fbdev buffer overallocation in percent. Default - is 100. Typical values for double buffering will be 200, - triple buffering 300. - -config DRM_FBDEV_LEAK_PHYS_SMEM - bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)" - depends on DRM_FBDEV_EMULATION && EXPERT - default n - help - In order to keep user-space compatibility, we want in certain - use-cases to keep leaking the fbdev physical address to the - user-space program handling the fbdev buffer. - This affects, not only, Amlogic, Allwinner or Rockchip devices - with ARM Mali GPUs using an userspace Blob. - This option is not supported by upstream developers and should be - removed as soon as possible and be considered as a broken and - legacy behaviour from a modern fbdev device driver. - - Please send any bug reports when using this to your proprietary - software vendor that requires this. - - If in doubt, say "N" or spread the word to your closed source - library vendor. - -endmenu +source "drivers/gpu/drm/clients/Kconfig" config DRM_LOAD_EDID_FIRMWARE bool "Allow to specify an EDID data set instead of probing for it" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 463afad1b5ca6..4f6585be14ccf 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -148,14 +148,6 @@ drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o -# -# DRM clients -# - -drm_client_lib-y := drm_client_setup.o -drm_client_lib-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_client.o -obj-$(CONFIG_DRM_CLIENT_LIB) += drm_client_lib.o - # # Drivers and the rest # @@ -165,6 +157,7 @@ obj-y += tests/ obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-y += arm/ +obj-y += clients/ obj-y += display/ obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_SCHED) += scheduler/ diff --git a/drivers/gpu/drm/clients/Kconfig b/drivers/gpu/drm/clients/Kconfig new file mode 100644 index 0000000000000..01ad3b0001303 --- /dev/null +++ b/drivers/gpu/drm/clients/Kconfig @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_CLIENT_LIB + tristate + depends on DRM + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION + help + This option enables the DRM client library and selects all + modules and components according to the enabled clients. + +config DRM_CLIENT_SELECTION + tristate + depends on DRM + select DRM_CLIENT_LIB if DRM_FBDEV_EMULATION + help + Drivers that support in-kernel DRM clients have to select this + option. + +config DRM_CLIENT_SETUP + bool + depends on DRM_CLIENT_SELECTION + help + Enables the DRM client selection. DRM drivers that support the + default clients should select DRM_CLIENT_SELECTION instead. + +menu "Supported DRM clients" + depends on DRM_CLIENT_SELECTION + +config DRM_FBDEV_EMULATION + bool "Enable legacy fbdev support for your modesetting driver" + depends on DRM_CLIENT_SELECTION + select DRM_CLIENT + select DRM_CLIENT_SETUP + select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE + default FB + help + Choose this option if you have a need for the legacy fbdev + support. Note that this support also provides the linux console + support on top of your modesetting driver. + + If in doubt, say "Y". + +config DRM_FBDEV_OVERALLOC + int "Overallocation of the fbdev buffer" + depends on DRM_FBDEV_EMULATION + default 100 + help + Defines the fbdev buffer overallocation in percent. Default + is 100. Typical values for double buffering will be 200, + triple buffering 300. + +config DRM_FBDEV_LEAK_PHYS_SMEM + bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)" + depends on DRM_FBDEV_EMULATION && EXPERT + default n + help + In order to keep user-space compatibility, we want in certain + use-cases to keep leaking the fbdev physical address to the + user-space program handling the fbdev buffer. + This affects, not only, Amlogic, Allwinner or Rockchip devices + with ARM Mali GPUs using a userspace Blob. + This option is not supported by upstream developers and should be + removed as soon as possible and be considered as a broken and + legacy behaviour from a modern fbdev device driver. + + Please send any bug reports when using this to your proprietary + software vendor that requires this. + + If in doubt, say "N" or spread the word to your closed source + library vendor. + +endmenu diff --git a/drivers/gpu/drm/clients/Makefile b/drivers/gpu/drm/clients/Makefile new file mode 100644 index 0000000000000..1d004ec92e1ea --- /dev/null +++ b/drivers/gpu/drm/clients/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +drm_client_lib-y := drm_client_setup.o +drm_client_lib-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_client.o +obj-$(CONFIG_DRM_CLIENT_LIB) += drm_client_lib.o diff --git a/drivers/gpu/drm/drm_client_setup.c b/drivers/gpu/drm/clients/drm_client_setup.c similarity index 100% rename from drivers/gpu/drm/drm_client_setup.c rename to drivers/gpu/drm/clients/drm_client_setup.c diff --git a/drivers/gpu/drm/drm_fbdev_client.c b/drivers/gpu/drm/clients/drm_fbdev_client.c similarity index 100% rename from drivers/gpu/drm/drm_fbdev_client.c rename to drivers/gpu/drm/clients/drm_fbdev_client.c -- GitLab From b86711c6d6e20eb945fe878de98ef7c9be2c2088 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 8 Nov 2024 16:42:38 +0100 Subject: [PATCH 077/882] drm/client: Move public client header to clients/ subdirectory Move the public header file drm_client_setup.h to the clients/ subdirectory and update all drivers. No functional changes. Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20241108154600.126162-3-tzimmermann@suse.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/arm/display/komeda/komeda_drv.c | 2 +- drivers/gpu/drm/arm/hdlcd_drv.c | 2 +- drivers/gpu/drm/arm/malidp_drv.c | 2 +- drivers/gpu/drm/armada/armada_drv.c | 2 +- drivers/gpu/drm/aspeed/aspeed_gfx_drv.c | 2 +- drivers/gpu/drm/ast/ast_drv.c | 2 +- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 2 +- drivers/gpu/drm/clients/drm_client_setup.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_drv.c | 2 +- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 2 +- drivers/gpu/drm/gma500/psb_drv.c | 2 +- drivers/gpu/drm/gud/gud_drv.c | 2 +- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 2 +- drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c | 2 +- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 2 +- drivers/gpu/drm/imx/dcss/dcss-kms.c | 2 +- drivers/gpu/drm/imx/ipuv3/imx-drm-core.c | 2 +- drivers/gpu/drm/imx/lcdc/imx-lcdc.c | 2 +- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 2 +- drivers/gpu/drm/kmb/kmb_drv.c | 2 +- drivers/gpu/drm/logicvc/logicvc_drm.c | 2 +- drivers/gpu/drm/loongson/lsdc_drv.c | 2 +- drivers/gpu/drm/mcde/mcde_drv.c | 2 +- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 2 +- drivers/gpu/drm/meson/meson_drv.c | 2 +- drivers/gpu/drm/mgag200/mgag200_drv.c | 2 +- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/mxsfb/lcdif_drv.c | 2 +- drivers/gpu/drm/mxsfb/mxsfb_drv.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- drivers/gpu/drm/omapdrm/omap_fbdev.c | 2 +- drivers/gpu/drm/pl111/pl111_drv.c | 2 +- drivers/gpu/drm/qxl/qxl_drv.c | 2 +- drivers/gpu/drm/radeon/radeon_drv.c | 2 +- drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c | 2 +- drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c | 2 +- drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c | 2 +- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 2 +- drivers/gpu/drm/solomon/ssd130x.c | 2 +- drivers/gpu/drm/sti/sti_drv.c | 2 +- drivers/gpu/drm/stm/drv.c | 2 +- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- drivers/gpu/drm/tegra/drm.c | 2 +- drivers/gpu/drm/tidss/tidss_drv.c | 2 +- drivers/gpu/drm/tilcdc/tilcdc_drv.c | 2 +- drivers/gpu/drm/tiny/arcpgu.c | 3 ++- drivers/gpu/drm/tiny/bochs.c | 2 +- drivers/gpu/drm/tiny/cirrus.c | 2 +- drivers/gpu/drm/tiny/gm12u320.c | 2 +- drivers/gpu/drm/tiny/hx8357d.c | 2 +- drivers/gpu/drm/tiny/ili9163.c | 2 +- drivers/gpu/drm/tiny/ili9225.c | 2 +- drivers/gpu/drm/tiny/ili9341.c | 2 +- drivers/gpu/drm/tiny/ili9486.c | 2 +- drivers/gpu/drm/tiny/mi0283qt.c | 2 +- drivers/gpu/drm/tiny/ofdrm.c | 2 +- drivers/gpu/drm/tiny/panel-mipi-dbi.c | 2 +- drivers/gpu/drm/tiny/repaper.c | 2 +- drivers/gpu/drm/tiny/sharp-memory.c | 2 +- drivers/gpu/drm/tiny/simpledrm.c | 2 +- drivers/gpu/drm/tiny/st7586.c | 2 +- drivers/gpu/drm/tiny/st7735r.c | 2 +- drivers/gpu/drm/tve200/tve200_drv.c | 2 +- drivers/gpu/drm/udl/udl_drv.c | 2 +- drivers/gpu/drm/vboxvideo/vbox_drv.c | 2 +- drivers/gpu/drm/vc4/vc4_drv.c | 2 +- drivers/gpu/drm/virtio/virtgpu_drv.c | 2 +- drivers/gpu/drm/vkms/vkms_drv.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- drivers/gpu/drm/xlnx/zynqmp_kms.c | 2 +- include/drm/{ => clients}/drm_client_setup.h | 0 72 files changed, 72 insertions(+), 71 deletions(-) rename include/drm/{ => clients}/drm_client_setup.h (100%) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6ac7d335e28eb..92e39eef29a7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c index 6d475bb34002e..b694c90d43905 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include "komeda_dev.h" diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index cd4389809d42d..4af8fce7dab7c 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -22,8 +22,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index 4cb25004b84fe..38b1b5b6bf69d 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -16,9 +16,9 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 5c26f0409478a..ddf0eaf546a69 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -11,8 +11,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c index 109023815fa2c..55e90ab9b231b 100644 --- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c +++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c @@ -13,8 +13,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 4afe4be072efc..8e9c9cb89a50e 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -31,8 +31,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index 792dcc19e8e7a..506289a937e25 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -16,9 +16,9 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/clients/drm_client_setup.c b/drivers/gpu/drm/clients/drm_client_setup.c index c14221ca5a0d8..c6a295d5de50c 100644 --- a/drivers/gpu/drm/clients/drm_client_setup.c +++ b/drivers/gpu/drm/clients/drm_client_setup.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MIT -#include +#include #include #include #include diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 2a466d8179f47..7e517a2ec6817 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -13,9 +13,9 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 91a48d774cf7d..12b2096b6c9d5 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -18,8 +18,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index c419ebbc49ec4..2c56ece17d717 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -19,8 +19,8 @@ #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index 09ccdc1dc1a25..65a16f009ea6b 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -13,9 +13,9 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 8c488c98ac97f..69bdae6049826 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -15,8 +15,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c index 86a3a1faff495..7e9c2e3ceee2c 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c @@ -17,8 +17,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index e0953777a2066..c5ac240f7a7a8 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -9,8 +9,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c index 63a335c62296e..490b5c269f39d 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-kms.c +++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c @@ -3,11 +3,11 @@ * Copyright 2019 NXP. */ +#include #include #include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c b/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c index ced06bd8eae8b..6b84429735d6f 100644 --- a/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c +++ b/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c @@ -13,9 +13,9 @@ #include