From 34b1c1c71d375835d589e0929299b8caf0171cf1 Mon Sep 17 00:00:00 2001 From: Mikolaj Wasiak Date: Tue, 4 Mar 2025 09:43:26 +0100 Subject: [PATCH 001/899] i915/selftest/igt_mmap: let mmap tests run in kthread When the driver is loaded on the system with numa nodes it might be run in a kthread, which makes it impossible to use current->mm in the selftest. This patch allows the selftest to use current->mm by using active_mm. Signed-off-by: Mikolaj Wasiak Reviewed-by: Eugene Kobyak Reviewed-by: Krzysztof Niemiec Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/2w6pt2hnemndwmanwhyn3keexa6vtha7rmo6rqoerkmyxhbrh2@ls7lndjpia6z --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 99a9ade739562..2ef2f3db5bcc8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1837,6 +1837,8 @@ static int igt_mmap_revoke(void *arg) int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { + int ret; + bool unuse_mm = false; static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), SUBTEST(igt_smoke_tiling), @@ -1848,5 +1850,15 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_mmap_gpu), }; - return i915_live_subtests(tests, i915); + if (!current->mm) { + kthread_use_mm(current->active_mm); + unuse_mm = true; + } + + ret = i915_live_subtests(tests, i915); + + if (unuse_mm) + kthread_unuse_mm(current->active_mm); + + return ret; } -- GitLab From 73782fc64793f6e6e0118797d3b9044436f2e8e9 Mon Sep 17 00:00:00 2001 From: Mikolaj Wasiak Date: Tue, 11 Mar 2025 10:33:14 +0100 Subject: [PATCH 002/899] i915/gt/selftests: Disable lrc_timestamp test This test was designed to isolate a bug in tigerlake and dg2 hardware. The bug was found and fixed in newer generations. Since we won't support any new hardware with this driver, the test should now be turned off in the CI to not pollute it with random failures on previous hardware. For reference, the issue has been discssued here[*]. [*] https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13697 Signed-off-by: Mikolaj Wasiak Reviewed-by: Chris Wilson Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/uxxb22n667zb3aic6zs4mr2krv5zavav5v2zjgqnhnabgxgzif@4icszicjakex --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e17b8777d21dc..8051123108e51 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include "gem/i915_gem_internal.h" +#include "i915_drv.h" #include "i915_selftest.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" @@ -858,6 +859,14 @@ static int live_lrc_timestamp(void *arg) U32_MAX, }; + /* + * This test was designed to isolate a hardware bug. + * The bug was found and fixed in future generations but + * now the test pollutes our CI on previous generation. + */ + if (GRAPHICS_VER(gt->i915) == 12) + return 0; + /* * We want to verify that the timestamp is saved and restore across * context switches and is monotonic. -- GitLab From b6aa4b8b3ebf502898a8164bd7d1aa7d5870c1b9 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 14 Mar 2025 03:12:22 +0100 Subject: [PATCH 003/899] drm/i915/gt: Fix SPDX license format Header files need to declare the SPDX under /* ... */ style comments at the beginning of the file. Signed-off-by: Andi Shyti Reviewed-by: Sebastian Brzezinka Reviewed-by: Krzysztof Karas Link: https://patchwork.freedesktop.org/patch/msgid/20250314021225.11813-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_wopcm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h index 17d6aa86008a9..d2038b6de5e74 100644 --- a/drivers/gpu/drm/i915/gt/intel_wopcm.h +++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2017-2018 Intel Corporation */ -- GitLab From 5bebf804effe3dd85e01a71d6a1fd82f4a1b436f Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 14 Mar 2025 03:12:23 +0100 Subject: [PATCH 004/899] drm/i915/gt: Remove trailing blank lines Remove useless blank lines before and after the brackets. Signed-off-by: Andi Shyti Reviewed-by: Sebastian Brzezinka Reviewed-by: Krzysztof Karas Link: https://patchwork.freedesktop.org/patch/msgid/20250314021225.11813-4-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.c | 1 - drivers/gpu/drm/i915/gt/intel_lrc.c | 1 - drivers/gpu/drm/i915/gt/intel_mocs.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 30b128b1fde7c..afbc5c7693089 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -176,7 +176,6 @@ static void clear_vm_list(struct list_head *list) i915_vma_destroy_locked(vma); i915_gem_object_put(obj); } - } } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51847a846002e..c481b56fa67d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -751,7 +751,6 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) /* * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index d791d63d49b49..96e7eb086a497 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -314,7 +314,6 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), /* WB - L3 */ -- GitLab From 5ba97b5925229ab9211c72de7ff40283685876a3 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 14 Mar 2025 03:12:24 +0100 Subject: [PATCH 005/899] drm/i915/gt: Use proper sleeping functions for timeouts shorter than 20ms msleep is very imprecise for timeouts shorter than 20 milliseconds and most probably will sleep longer. Use uslee_range() instead. Signed-off-by: Andi Shyti Reviewed-by: Sebastian Brzezinka Reviewed-by: Krzysztof Karas Link: https://patchwork.freedesktop.org/patch/msgid/20250314021225.11813-5-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_rc6.c | 3 ++- drivers/gpu/drm/i915/gt/selftest_tlb.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 27b6d51ef1454..1f8bc5ac20c25 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -60,7 +60,8 @@ int live_rc6_manual(void *arg) /* Force RC6 off for starters */ __intel_rc6_disable(rc6); - msleep(1); /* wakeup is not immediate, takes about 100us on icl */ + /* wakeup is not immediate, takes about 100us on icl */ + usleep_range(1000, 2000); res[0] = rc6_residency(rc6); diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 3941f2d6fa47d..69ed946a39e5c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -143,7 +143,7 @@ pte_tlbinv(struct intel_context *ce, if (ce->engine->class == OTHER_CLASS) msleep(200); else - msleep(10); + usleep_range(10000, 20000); if (va == vb) { if (!i915_request_completed(rq)) { -- GitLab From 0a4ae87706c6d15d14648e428c3a76351f823e48 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 10 Mar 2025 20:58:21 +0530 Subject: [PATCH 006/899] drm/i915: Disable RPG during live selftest The Forcewake timeout issue has been observed on Gen 12.0 and above. To address this, disable Render Power-Gating (RPG) during live self-tests for these generations. The temporary workaround 'drm/i915/mtl: do not enable render power-gating on MTL' disables RPG globally, which is unnecessary since the issues were only seen during self-tests. v2: take runtime pm wakeref Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9413 Fixes: 25e7976db86b ("drm/i915/mtl: do not enable render power-gating on MTL") Cc: Rodrigo Vivi Cc: Andi Shyti Cc: Andrzej Hajda Signed-off-by: Badal Nilawar Signed-off-by: Sk Anirban Reviewed-by: Karthik Poosa Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20250310152821.2931678-1-sk.anirban@intel.com --- drivers/gpu/drm/i915/gt/intel_rc6.c | 19 ++++--------------- .../gpu/drm/i915/selftests/i915_selftest.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9378d5901c493..9ca42589da4da 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* - * BSpec 52698 - Render powergating must be off. - * FIXME BSpec is outdated, disabling powergating for MTL is just - * temporary wa and should be removed after fixing real cause - * of forcewake timeouts. - */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - pg_enable = - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; - else - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index fee76c1d2f450..889281819c5b1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,7 +23,9 @@ #include +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_regs.h" #include "gt/uc/intel_gsc_fw.h" #include "i915_driver.h" @@ -253,11 +255,27 @@ int i915_mock_selftests(void) int i915_live_selftests(struct pci_dev *pdev) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_uncore *uncore = &i915->uncore; int err; + u32 pg_enable; + intel_wakeref_t wakeref; if (!i915_selftest.live) return 0; + /* + * FIXME Disable render powergating, this is temporary wa and should be removed + * after fixing real cause of forcewake timeouts. + */ + with_intel_runtime_pm(uncore->rpm, wakeref) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 00), IP_VER(12, 74))) { + pg_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + if (pg_enable & GEN9_RENDER_PG_ENABLE) + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, + pg_enable & ~GEN9_RENDER_PG_ENABLE); + } + } + __wait_gsc_proxy_completed(i915); __wait_gsc_huc_load_completed(i915); -- GitLab From c235b03145b97b8eb94af03e99740833e20c955d Mon Sep 17 00:00:00 2001 From: Sk Anirban Date: Fri, 28 Mar 2025 00:49:24 +0530 Subject: [PATCH 007/899] drm/i915/selftests: Refactor RC6 power measurement and error handling Revise the power measurement logic to save and evaluate energy values. Previously, the test only checked whether the system had entered the RC6 state, without considering any potential interruptions in that state. This update introduces a threshold check to ensure that the GPU remains in the RC6 state properly during the specified sleep duration. v3: - Reorder threshold check (Badal) v4: - Improved commit message (Anshuman) v5: - Rename variables for improved readability (Anshuman) Signed-off-by: Sk Anirban Reviewed-by: Badal Nilawar Signed-off-by: Anshuman Gupta Link: https://lore.kernel.org/r/20250327191924.4131598-1-sk.anirban@intel.com --- drivers/gpu/drm/i915/gt/selftest_rc6.c | 51 ++++++++++++++++++-------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 1f8bc5ac20c25..aa8274dee668c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -33,15 +33,22 @@ int live_rc6_manual(void *arg) { struct intel_gt *gt = arg; struct intel_rc6 *rc6 = >->rc6; - u64 rc0_power, rc6_power; + struct intel_rps *rps = >->rps; intel_wakeref_t wakeref; + u64 rc0_sample_energy[2]; + u64 rc6_sample_energy[2]; + u64 sleep_time = 1000; + u32 rc0_freq = 0; + u32 rc6_freq = 0; + u64 rc0_power; + u64 rc6_power; bool has_power; + u64 threshold; ktime_t dt; u64 res[2]; int err = 0; - u32 rc0_freq = 0; - u32 rc6_freq = 0; - struct intel_rps *rps = >->rps; + u64 diff; + /* * Our claim is that we can "encourage" the GPU to enter rc6 at will. @@ -66,9 +73,9 @@ int live_rc6_manual(void *arg) res[0] = rc6_residency(rc6); dt = ktime_get(); - rc0_power = librapl_energy_uJ(); - msleep(1000); - rc0_power = librapl_energy_uJ() - rc0_power; + rc0_sample_energy[0] = librapl_energy_uJ(); + msleep(sleep_time); + rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0]; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); rc0_freq = intel_rps_read_actual_frequency_fw(rps); @@ -80,11 +87,12 @@ int live_rc6_manual(void *arg) } if (has_power) { - rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, + rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1], ktime_to_ns(dt)); + if (!rc0_power) { if (rc0_freq) - pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n", + pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n", rc0_freq); else pr_err("No power and freq measured while in RC0\n"); @@ -99,10 +107,10 @@ int live_rc6_manual(void *arg) res[0] = rc6_residency(rc6); intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); dt = ktime_get(); - rc6_power = librapl_energy_uJ(); - msleep(1000); + rc6_sample_energy[0] = librapl_energy_uJ(); + msleep(sleep_time); rc6_freq = intel_rps_read_actual_frequency_fw(rps); - rc6_power = librapl_energy_uJ() - rc6_power; + rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0]; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); if (res[1] == res[0]) { @@ -114,13 +122,24 @@ int live_rc6_manual(void *arg) } if (has_power) { - rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, + rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1], ktime_to_ns(dt)); - pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", + pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n", rc0_power, rc6_power); + if (2 * rc6_power > rc0_power) { - pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n", - rc6_freq, rc0_freq); + pr_err("GPU leaked energy while in RC6!\n" + "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n" + "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n" + "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n", + rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1], + rc6_sample_energy[0], rc6_sample_energy[1]); + + diff = res[1] - res[0]; + threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10; + if (diff < threshold) + pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n", + res[0], res[1]); err = -EINVAL; goto out_unlock; } -- GitLab From af30b9a97452104e3e229bbfb445e2a1d679f672 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 22:46:19 +0300 Subject: [PATCH 008/899] drm/i915/gsc: delete a stray tab in intel_gsc_fw_get_binary_info() This line is indented on tab too far. Delete the extra tab. Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/6152e1ac-745d-4b38-ba49-f013e6760936@stanley.mountain Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 5dc0ccd076363..d550eb6edfb8e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -230,7 +230,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x", gsc->release.major, gsc->release.minor, gsc->release.patch, gsc->release.build); - return -EINVAL; + return -EINVAL; } if (min_ver.major) { -- GitLab From 4e22d5b8ba499905721329f89675795f4ca488b2 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 28 Mar 2025 00:26:29 +0100 Subject: [PATCH 009/899] drm/i915/gem: Convert SPDX headers to single-line format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace multi-line SPDX license headers with single-line equivalents (// SPDX-License-Identifier: MIT or /* ... */ for headers), as preferred by current kernel coding style. Signed-off-by: Andi Shyti Reviewed-by: Nitin Gote Acked-by: Jani Nikula Reviewed-by: Mikołaj Wasiak Link: https://patchwork.freedesktop.org/patch/msgid/20250327232629.2939-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_context.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_ioctls.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_mman.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_pm.h | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 5 ++--- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gemfs.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gemfs.h | 3 +-- 29 files changed, 30 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index ddda468241ef9..6e4d0ce3952f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 7d97ea2a653e4..c4854c5b4e0fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c3829731295..9d7ee15799001 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c0543c35cd6ac..e17c3a6723cb5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2011-2012 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index e5b0f66ea1feb..6e682a6a0574e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index b6d97da63d1fa..c476732ad45f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 9473050ac8425..05e440643aa2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright 2012 Red Hat Inc */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 3770828f2eaf2..fab1d277e2ab4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f151640c1d13c..02c59808cbe48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008,2010 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ea7561ae6e136..232b984f60b61 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index 28d6526e32ab0..8044d34707b68 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c3dabb8579605..f6d37dff320d4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h index 196417fd0f5c4..946fb9825eb33 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 356530b599ce5..1f38e367c60b5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2017 Intel Corporation * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bb713e096db28..c25737285b128 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 68413c05c8122..64600aa8227f0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8780aa2431053..7f83f8bdc8fbb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index ef85c6dc9fd59..f9e7cab140f8c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 900c083379425..f0857c5c96df3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index bedf1e95941aa..bd5bd2c5e7f91 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index ae3343c81a645..3b81cc3ad5be7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 9117e94228440..8e051625f23cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008-2015 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 9d958a6f377e5..3380151edfc18 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008-2012 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index af85d0c281686..8814cbcde5b59 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index d9eb84c1d2f11..84b7d394f3ddf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 09b68713ab329..307a18eede723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2012-2014 Intel Corporation * - * Based on amdgpu_mn, which bears the following notice: + * Based on amdgpu_mn, which bears the following notice: * * Copyright 2014 Advanced Micro Devices, Inc. * All Rights Reserved. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1f55e62044a4b..7127e90c1a8f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 46b9a17d6abc6..65d84a93c5253 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2017 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h index 5d835e44c4f6e..16d4333c9a4eb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.h +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2017 Intel Corporation */ -- GitLab From f5b444361435bfc9eeaaae3dd2e9b4f18dae3888 Mon Sep 17 00:00:00 2001 From: Andrew Ballance Date: Sun, 16 Mar 2025 06:16:44 -0500 Subject: [PATCH 010/899] gpu: nova-core: remove completed Vec extentions from task list The requested Vec methods have been implemented thus, remove the completed item from the nova task list. Link: https://lore.kernel.org/r/20250316111644.154602-4-andrewjballance@gmail.com Signed-off-by: Andrew Ballance Signed-off-by: Danilo Krummrich --- Documentation/gpu/nova/core/todo.rst | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index ca08377d3b73f..234d753d3eacc 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -190,16 +190,6 @@ Rust abstraction for debugfs APIs. | Reference: Export GSP log buffers | Complexity: Intermediate -Vec extensions --------------- - -Implement ``Vec::truncate`` and ``Vec::resize``. - -Currently this is used for some experimental code to parse the vBIOS. - -| Reference vBIOS support -| Complexity: Beginner - GPU (general) ============= -- GitLab From 9cdd9c4d039eae77142a27ffeaee76a5d5a9294f Mon Sep 17 00:00:00 2001 From: Leo Zeng Date: Tue, 25 Feb 2025 15:59:59 -0500 Subject: [PATCH 011/899] drm/amd/display: Add override for visual confirm [WHY] We want to allow the display manager to override the visual confirm color in DC when required. [HOW] Add new visual confirm mode VISUAL_CONFIRM_EXPLICIT, check mode before setting visual confirm color. Reviewed-by: Aric Cyr Signed-off-by: Leo Zeng Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28d1353f403df..5a44f5da68dcc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1192,6 +1192,12 @@ static void apply_ctx_interdependent_lock(struct dc *dc, static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) { + if (dc->debug.visual_confirm & VISUAL_CONFIRM_EXPLICIT) { + memcpy(&pipe_ctx->visual_confirm_color, &pipe_ctx->plane_state->visual_confirm_color, + sizeof(pipe_ctx->visual_confirm_color)); + return; + } + if (dc->ctx->dce_version >= DCN_VERSION_1_0) { memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color)); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c2ee05269260..78dd4d1f51dca 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -496,6 +496,7 @@ enum visual_confirm { VISUAL_CONFIRM_HW_CURSOR = 20, VISUAL_CONFIRM_VABC = 21, VISUAL_CONFIRM_DCC = 22, + VISUAL_CONFIRM_EXPLICIT = 0x80000000, }; enum dc_psr_power_opts { -- GitLab From c05da59e979bfe2d6555c7097e012b433a35d1b8 Mon Sep 17 00:00:00 2001 From: Leo Zeng Date: Wed, 26 Feb 2025 14:35:05 -0500 Subject: [PATCH 012/899] drm/amd/display: Get visual confirm color for stream [WHY] We want to output visual confirm color based on stream. [HOW] If visual confirm is for DMUB, use DMUB to get color. Otherwise, find plane with highest layer index, output visual confirm color of pipe that contains plane with highest index. Reviewed-by: Aric Cyr Signed-off-by: Leo Zeng Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 45 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 5 +++ .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 1 - 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5a44f5da68dcc..be63cc4aca1f5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1231,6 +1231,51 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte } } +void dc_get_visual_confirm_for_stream( + struct dc *dc, + struct dc_stream_state *stream_state, + struct tg_color *color) +{ + struct dc_stream_status *stream_status = dc_stream_get_status(stream_state); + struct pipe_ctx *pipe_ctx; + int i; + struct dc_plane_state *plane_state = NULL; + + if (!stream_status) + return; + + switch (dc->debug.visual_confirm) { + case VISUAL_CONFIRM_DISABLE: + return; + case VISUAL_CONFIRM_PSR: + case VISUAL_CONFIRM_FAMS: + pipe_ctx = dc_stream_get_pipe_ctx(stream_state); + if (!pipe_ctx) + return; + dc_dmub_srv_get_visual_confirm_color_cmd(dc, pipe_ctx); + memcpy(color, &dc->ctx->dmub_srv->dmub->visual_confirm_color, sizeof(struct tg_color)); + return; + + default: + /* find plane with highest layer_index */ + for (i = 0; i < stream_status->plane_count; i++) { + if (stream_status->plane_states[i]->visible) + plane_state = stream_status->plane_states[i]; + } + if (!plane_state) + return; + /* find pipe that contains plane with highest layer index */ + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == plane_state) { + memcpy(color, &pipe->visual_confirm_color, sizeof(struct tg_color)); + return; + } + } + } +} + static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 78dd4d1f51dca..be2518e07c14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2590,6 +2590,11 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); /* DSC Interfaces */ #include "dc_dsc.h" +void dc_get_visual_confirm_for_stream( + struct dc *dc, + struct dc_stream_state *stream_state, + struct tg_color *color); + /* Disable acc mode Interfaces */ void dc_disable_accelerated_mode(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 912f96323ed6a..e34a93b703a7a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2664,7 +2664,6 @@ void dcn10_update_visual_confirm_color(struct dc *dc, struct mpc *mpc = dc->res_pool->mpc; if (mpc->funcs->set_bg_color) { - memcpy(&pipe_ctx->plane_state->visual_confirm_color, &(pipe_ctx->visual_confirm_color), sizeof(struct tg_color)); mpc->funcs->set_bg_color(mpc, &(pipe_ctx->visual_confirm_color), mpcc_id); } } -- GitLab From 7a505a844c7aeeb9e5180f05c9714a47bcad5395 Mon Sep 17 00:00:00 2001 From: Cruise Date: Thu, 6 Mar 2025 10:17:48 +0800 Subject: [PATCH 013/899] drm/amd/display: Remove BW Allocation from DPIA notification [Why] USB4 BW Allocation response will be handled in HPD IRQ. No need to handle it in DPIA notification callback. [How] Remove DP BW allocation response code in DPIA notification. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Cruise Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 4 ---- .../drm/amd/display/dmub/src/dmub_srv_stat.c | 17 ----------------- 2 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 4e0efff92dcaf..80595786341f9 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -566,10 +566,6 @@ struct dmub_notification { struct aux_reply_data aux_reply; enum dp_hpd_status hpd_status; enum set_config_status sc_status; - /** - * DPIA notification command. - */ - struct dmub_rb_cmd_dpia_notification dpia_notification; struct dmub_rb_cmd_hpd_sense_notify_data hpd_sense_notify; }; }; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c index cce887cefc017..1c33857aa513c 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c @@ -95,23 +95,6 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, case DMUB_OUT_CMD__DPIA_NOTIFICATION: notify->type = DMUB_NOTIFICATION_DPIA_NOTIFICATION; notify->link_index = cmd.dpia_notification.payload.header.instance; - - if (cmd.dpia_notification.payload.header.type == DPIA_NOTIFY__BW_ALLOCATION) { - - notify->dpia_notification.payload.data.dpia_bw_alloc.estimated_bw = - cmd.dpia_notification.payload.data.dpia_bw_alloc.estimated_bw; - notify->dpia_notification.payload.data.dpia_bw_alloc.allocated_bw = - cmd.dpia_notification.payload.data.dpia_bw_alloc.allocated_bw; - - if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_failed) - notify->result = DPIA_BW_REQ_FAILED; - else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_succeeded) - notify->result = DPIA_BW_REQ_SUCCESS; - else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.est_bw_changed) - notify->result = DPIA_EST_BW_CHANGED; - else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_alloc_cap_changed) - notify->result = DPIA_BW_ALLOC_CAPS_CHANGED; - } break; case DMUB_OUT_CMD__HPD_SENSE_NOTIFY: notify->type = DMUB_NOTIFICATION_HPD_SENSE_NOTIFY; -- GitLab From 76468055069ce2912eb78a76f0e0648c4f674f51 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Fri, 7 Mar 2025 13:36:53 -0500 Subject: [PATCH 014/899] drm/amd/display: DML21 Reintegration [Why] To bring in latest changes in DML21 [List of Changes] - Unification of DML logging to use DML_LOG_* macro - Clean up variables that are exclusively used for logging Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 3 - .../amd/display/dc/dml2/dml21/inc/dml_top.h | 1 + .../dml2/dml21/inc/dml_top_dchub_registers.h | 2 + .../dml21/inc/dml_top_display_cfg_types.h | 3 +- .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 10 +- .../src/dml2_core/dml2_core_dcn4_calcs.c | 3083 ++++++++--------- .../src/dml2_core/dml2_core_shared_types.h | 2 + .../dml21/src/dml2_core/dml2_core_utils.c | 134 +- .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 4 + .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 8 +- .../dml2/dml21/src/dml2_top/dml2_top_soc15.c | 2 +- .../dc/dml2/dml21/src/inc/dml2_debug.c | 31 - .../dc/dml2/dml21/src/inc/dml2_debug.h | 77 +- 13 files changed, 1633 insertions(+), 1727 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 21fd466dba26e..157ecf008d6cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -99,7 +99,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math. CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) @@ -117,11 +116,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_floa CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_rcflags) DML21 := src/dml2_top/dml2_top_interfaces.o DML21 += src/dml2_top/dml2_top_soc15.o -DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index a64ec4dcf11ab..c047d56527c47 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -43,4 +43,5 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o */ bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 25b607e7b726e..84c90050668c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -156,6 +156,8 @@ struct dml2_dchub_watermark_regs { uint32_t urgent; uint32_t sr_enter; uint32_t sr_exit; + uint32_t sr_enter_z8; + uint32_t sr_exit_z8; uint32_t uclk_pstate; uint32_t fclk_pstate; uint32_t temp_read_or_ppt; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index 5e1ab6d976404..255f05de362cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -166,7 +166,7 @@ struct dml2_surface_cfg { enum dml2_swizzle_mode tiling; struct { - unsigned long pitch; + unsigned long pitch; // In elements, two pixels per element in 422 packed format unsigned long width; unsigned long height; } plane0; @@ -385,6 +385,7 @@ struct dml2_plane_parameters { long reserved_vblank_time_ns; unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay unsigned int gpuvm_min_page_size_kbytes; + unsigned int hostvm_min_page_size_kbytes; enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index bb863c8c6b39c..6ee37386f6728 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -456,10 +456,10 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); - dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); - dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); - dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); - dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000); @@ -509,7 +509,7 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index; in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000; - dml2_printf("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); + DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); if (!((stream_bitmask >> stream_index) & 0x1)) { in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 4c504cb0e1c53..5d91f195397a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -54,104 +54,104 @@ static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { - dml2_printf("DML: ===================================== \n"); - dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) - dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) - dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) - dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) - dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) - dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) - dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) - dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) - dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); if (!fail_only || support->g6_temp_read_support == 0) - dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); if (!fail_only || support->ModeSupport == 0) - dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - dml2_printf("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); } static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) @@ -179,11 +179,9 @@ static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg } else { out_bpp[k] = 0; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); - dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); - dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); } } @@ -212,9 +210,7 @@ static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const stru num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); -#endif + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); return num_active_pipes; } @@ -251,7 +247,7 @@ static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); - dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); + DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); return is_phantom; } @@ -415,19 +411,17 @@ static void CalculateMaxDETAndMinCompressedBufferSize( *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; -#if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); - dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); - dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); - dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); - dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); -#endif + DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); if (nomDETInKByteOverrideEnable) { *nomDETInKByte = nomDETInKByteOverrideValue; - dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); } } @@ -502,7 +496,7 @@ static bool dml_is_420(enum dml2_source_format_class source_format) val = 0; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -535,7 +529,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode else if (sw_mode == dml2_gfx11_sw_256kb_r_x) return 262144; else { - DML2_ASSERT(0); + DML_ASSERT(0); return 256; } } @@ -570,8 +564,8 @@ static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_gfx11_sw_256kb_r_x) { version = 11; } else { - dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); } return version; @@ -645,21 +639,19 @@ static void CalculateBytePerPixelAndBlockSizes( *BytePerPixelY = 2; *BytePerPixelC = 4; } else { - dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); + DML_ASSERT(0); } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); - dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); - dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); - dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); - dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); - dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y); - dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c); - dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); - dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); -#endif + DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y); + DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c); + DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); + DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); if (dml_get_gfx_version(SurfaceTiling) == 11) { *surf_linear128_l = 0; @@ -703,12 +695,10 @@ static void CalculateBytePerPixelAndBlockSizes( *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); - dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); - dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); - dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); -#endif + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); if (dml_get_gfx_version(SurfaceTiling) == 11) { if (SurfaceTiling == dml2_gfx11_sw_linear) { @@ -752,8 +742,8 @@ static void CalculateBytePerPixelAndBlockSizes( } else if (SurfaceTiling == dml2_sw_256kb_2d) { macro_tile_scale = 32; } else { - dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); + DML_ASSERT(0); } *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; @@ -766,12 +756,10 @@ static void CalculateBytePerPixelAndBlockSizes( } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); - dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); - dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); - dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); -#endif + DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); } static void CalculateSinglePipeDPPCLKAndSCLThroughput( @@ -860,10 +848,8 @@ static void CalculateSwathWidth( unsigned int surface_width_ub_c; unsigned int surface_height_ub_c; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); -#endif + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { @@ -872,11 +858,9 @@ static void CalculateSwathWidth( SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); - dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); - dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); MainSurfaceODMMode = ODMMode[k]; @@ -899,13 +883,11 @@ static void CalculateSwathWidth( } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); - dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); - dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); - dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); + DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { SwathWidthC[k] = SwathWidthY[k] / 2; @@ -934,22 +916,20 @@ static void CalculateSwathWidth( surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); - dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); - dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); - dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); - dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); - dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); - dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); - dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); - dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); - dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); - dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); - dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); - dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); - dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); req_per_swath_ub_l[k] = 0; req_per_swath_ub_c[k] = 0; @@ -995,15 +975,12 @@ static void CalculateSwathWidth( } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); - dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); - dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); -#endif - + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); } } @@ -1018,13 +995,11 @@ static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsi if (unb_req_force_en) { unb_req_en = unb_req_force_val && unb_req_ok; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); - dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); - dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); - dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); -#endif - return (unb_req_en); + DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); + DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); + DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); + DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); + return unb_req_en; } static void CalculateDETBufferSize( @@ -1054,16 +1029,14 @@ static void CalculateDETBufferSize( bool NextPotentialSurfaceToAssignDETPieceFound; bool MinimizeReallocationSuccess = false; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); - dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); - dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); - dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); - dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); -#endif + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); // Note: Will use default det size if that fits 2 swaths if (UnboundedRequestEnabled) { @@ -1092,19 +1065,15 @@ static void CalculateDETBufferSize( l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); - dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); - dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); - dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); + DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); if (l->minDET_pipe == 0) { l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); } if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { @@ -1117,12 +1086,10 @@ static void CalculateDETBufferSize( l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); - dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); - dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); } if (display_cfg->minimize_det_reallocation) { @@ -1194,14 +1161,12 @@ static void CalculateDETBufferSize( l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__); for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); } - dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__); -#endif - dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); + DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { @@ -1213,10 +1178,8 @@ static void CalculateDETBufferSize( } else { DETPieceAssignedToThisSurfaceAlready[k] = false; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); - dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); } for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { @@ -1224,22 +1187,18 @@ static void CalculateDETBufferSize( l->NextSurfaceToAssignDETPiece = 0; for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); -#endif + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { l->NextSurfaceToAssignDETPiece = k; NextPotentialSurfaceToAssignDETPieceFound = true; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); - dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); -#endif + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); } if (NextPotentialSurfaceToAssignDETPieceFound) { @@ -1249,20 +1208,16 @@ static void CalculateDETBufferSize( * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); - dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); - dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); - dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); - dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); -#endif + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); -#endif + DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; @@ -1274,13 +1229,11 @@ static void CalculateDETBufferSize( } *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__); - dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); } -#endif } static double CalculateRequiredDispclk( @@ -1510,15 +1463,13 @@ static unsigned int dscceComputeDelay( //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: bpc: %u\n", __func__, bpc); - dml2_printf("DML::%s: BPP: %f\n", __func__, BPP); - dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); - dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices); - dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); - dml2_printf("DML::%s: Output: %u\n", __func__, Output); - dml2_printf("DML::%s: pixels: %u\n", __func__, pixels); -#endif + DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc); + DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP); + DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices); + DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels); return pixels; } @@ -1593,10 +1544,8 @@ static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, e // sft Delay = Delay + 1; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); - dml2_printf("DML::%s: Delay = %u\n", __func__, Delay); -#endif + DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay); return Delay; } @@ -1667,10 +1616,8 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ } meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); - dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); -#endif + DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); + DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); if (p->GPUVMEnable == true) { double meta_vmpg_bytes = 4.0 * 1024.0; *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); @@ -1724,25 +1671,23 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); - dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); - dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); - dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); - dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); - dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); - dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); - dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); - dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); - dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); - dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); - dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); - dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); - dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); - dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); -#endif + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); + DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); + DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); + DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); + DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); + DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); + DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); + DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); + DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); + DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); + DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); if (p->SurfaceTiling == dml2_sw_linear) { *p->PixelPTEReqHeight = 1; @@ -1778,22 +1723,20 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); if (p->GPUVMEnable == true) { - dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); - DML2_ASSERT(0); + DML_ASSERT(0); } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); - dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); - dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); - dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); - dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); - dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch); - dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); - dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); -#endif + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); + DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); @@ -1811,7 +1754,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->dpte_row_height_linear = 128; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); #endif } else if (!dml_is_vertical_rotation(p->RotationAngle)) { @@ -1825,7 +1768,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); #endif *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; @@ -1840,7 +1783,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); #endif } @@ -1852,18 +1795,18 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); - dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); - dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); - dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); - dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); - dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); - dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); - dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); - dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); #endif return vm_bytes; @@ -1894,12 +1837,12 @@ static unsigned int CalculatePrefetchSourceLines( double numLines = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); - dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps); - dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); - dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); - dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); - dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps); + DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); #endif if (ProgressiveToInterlaceUnitInOPP) *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); @@ -1934,11 +1877,11 @@ static unsigned int CalculatePrefetchSourceLines( numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); - dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); - dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); - dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); - dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); + DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); #endif return (unsigned int)(numLines); @@ -2007,8 +1950,8 @@ static void CalculateMALLUseForStaticScreen( if (is_using_mall_for_ss[k]) TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); - dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); #endif } @@ -2022,7 +1965,7 @@ static void CalculateMALLUseForStaticScreen( (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { CanAddAnotherSurfaceToMALL = true; SurfaceToAddToMALL = k; - dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); } } if (CanAddAnotherSurfaceToMALL) { @@ -2030,8 +1973,8 @@ static void CalculateMALLUseForStaticScreen( TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); - dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); + DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); #endif } } @@ -2203,15 +2146,15 @@ static void CalculateDCCConfiguration( segment_order_vert_contiguous_chroma = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); - dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); - dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); - dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); - dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); - dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); - dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); + DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); #endif if (DCCProgrammingAssumesScanDirectionUnknown == true) { if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { @@ -2301,12 +2244,12 @@ static void CalculateDCCConfiguration( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); - dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); - dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); - dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); - dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); - dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); #endif } @@ -2326,26 +2269,26 @@ static void calculate_mcache_row_bytes( unsigned int mvmpg_per_mcache; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans); - dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); - dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); - dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); - dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); - dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); - dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); - dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); - dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); - dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); - dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); - dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); - dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width); - dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height); - dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); - dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); - dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); -#endif - DML2_ASSERT(p->mcache_line_size_bytes != 0); - DML2_ASSERT(p->mcache_size_bytes != 0); + DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans); + DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); + DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); + DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); + DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); + DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); + DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); + DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); + DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width); + DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); +#endif + DML_ASSERT(p->mcache_line_size_bytes != 0); + DML_ASSERT(p->mcache_size_bytes != 0); *p->mvmpg_width = 0; *p->mvmpg_height = 0; @@ -2370,8 +2313,8 @@ static void calculate_mcache_row_bytes( *p->mvmpg_width = p->vmpg_width; *p->mvmpg_height = p->vmpg_height; } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { - dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); + DML_ASSERT(0); } } @@ -2439,25 +2382,25 @@ static void calculate_mcache_row_bytes( *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); - dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); - dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); - dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); - dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); - dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); - dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); - dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); - dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); - dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); + DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); + DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); + DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); + DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); + DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); - dml2_printf("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); - dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); + DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); #endif - DML2_ASSERT(*p->num_mcaches > 0); + DML_ASSERT(*p->num_mcaches > 0); } static void calculate_mcache_setting( @@ -2523,7 +2466,7 @@ static void calculate_mcache_setting( l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; calculate_mcache_row_bytes(scratch, &l->l_p); - DML2_ASSERT(*p->num_mcaches_l > 0); + DML_ASSERT(*p->num_mcaches_l > 0); if (l->is_dual_plane) { l->c_p.num_chans = p->num_chans; @@ -2559,7 +2502,7 @@ static void calculate_mcache_setting( l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; calculate_mcache_row_bytes(scratch, &l->c_p); - DML2_ASSERT(*p->num_mcaches_c > 0); + DML_ASSERT(*p->num_mcaches_c > 0); } // Sharing for iMALL access @@ -2598,28 +2541,28 @@ static void calculate_mcache_setting( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); - dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); - dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); - dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); - dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); - dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); - dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); - dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); - dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); - dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); + DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); + DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); + DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); + DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); + DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); if (l->is_dual_plane) { - dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); - dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); - dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); - dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); - dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); - dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); - dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); - dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); - dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); - dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); + DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); + DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); + DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); + DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); + DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); } #endif // calculate split_coordinate @@ -2639,11 +2582,11 @@ static void calculate_mcache_setting( } #ifdef __DML_VBA_DEBUG__ for (n = 0; n < *p->num_mcaches_l; n++) - dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); if (l->is_dual_plane) { for (n = 0; n < *p->num_mcaches_c; n++) - dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); } #endif @@ -2660,10 +2603,10 @@ static void calculate_mcache_setting( #ifdef __DML_VBA_DEBUG__ for (n = 0; n < *p->num_mcaches_l; n++) - dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); for (n = 0; n < *p->num_mcaches_c; n++) - dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); #endif } @@ -2694,8 +2637,8 @@ static void calculate_mall_bw_overhead_factor( mall_prefetch_dram_overhead_factor[k] = 2.0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); - dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); #endif } } @@ -2772,22 +2715,20 @@ static double dml_get_return_bandwidth_available( else // dml2_core_internal_bw_dram return_bw_mbps = derate_dram_bandwidth; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); - dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); - dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); - dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); - dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); - dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); - dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); - dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); - dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); - dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); - dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); - dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); - dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); - dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); -#endif + DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); + DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); + DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); + DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); + DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); + DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); + DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); + DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); + DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); return return_bw_mbps; } @@ -2807,9 +2748,9 @@ static noinline_for_stack void calculate_bandwidth_available( { unsigned int n, m; - dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); - dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); - dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); // Calculate all the bandwidth availabe for (m = 0; m < dml2_core_internal_soc_state_max; m++) { @@ -2828,8 +2769,8 @@ static noinline_for_stack void calculate_bandwidth_available( #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); - dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); #endif // urg_bandwidth_available_vm_only is indexed by soc_state @@ -2843,9 +2784,9 @@ static noinline_for_stack void calculate_bandwidth_available( urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); - dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); - dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); #endif } } @@ -2879,13 +2820,13 @@ static void calculate_avg_bandwidth_required( // SysActive and SVP Prefetch AVG bandwidth Check for (k = 0; k < num_active_planes; ++k) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: plane %0d\n", __func__, k); - dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); - dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); - dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); - dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); - dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); - dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); #endif sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; @@ -2902,10 +2843,10 @@ static void calculate_avg_bandwidth_required( avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); #endif } } @@ -3080,10 +3021,10 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, &p->MaxNumSwathY[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); - dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); - dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); - dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); #endif p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; @@ -3091,8 +3032,8 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); - dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); #endif if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { p->PTEBufferSizeNotExceeded[k] = true; @@ -3104,18 +3045,18 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); #ifdef __DML_VBA_DEBUG__ if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); - dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); } #endif } @@ -3146,8 +3087,8 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { p->DCCMetaBufferSizeNotExceeded[k] = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); - dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); #endif p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); @@ -3170,9 +3111,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, p->DCCMetaBufferSizeNotExceeded[k] = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); - dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); - dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); #endif } @@ -3209,20 +3150,20 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, &p->dpte_row_bw[k], &p->meta_row_bw[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); - dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); - dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); - dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); - dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); - dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); - dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); #endif } } @@ -3257,19 +3198,19 @@ static double CalculateUrgentLatency( } #ifdef __DML_VBA_DEBUG__ if (qos_type == dml2_qos_param_type_dcn4x) { - dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); - dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); } else { - dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); - dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); - dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); - dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); - dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); } - dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); #endif return urgent_latency; } @@ -3296,18 +3237,18 @@ static double CalculateTripToMemory( #ifdef __DML_VBA_DEBUG__ if (qos_type == dml2_qos_param_type_dcn4x) { - dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); - dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); - dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); - dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); - dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); - dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); + DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); } else { - dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); } - dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); + DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); #endif @@ -3334,14 +3275,14 @@ static double CalculateMetaTripToMemory( #ifdef __DML_VBA_DEBUG__ if (qos_type == dml2_qos_param_type_dcn4x) { - dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); - dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); - dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); } else { - dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); } - dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); #endif @@ -3358,7 +3299,6 @@ static void calculate_cursor_req_attributes( unsigned int *cursor_bytes_per_chunk, unsigned int *cursor_bytes) { - unsigned int cursor_pitch = 0; unsigned int cursor_bytes_per_req = 0; unsigned int cursor_width_bytes = 0; unsigned int cursor_height = 0; @@ -3366,10 +3306,6 @@ static void calculate_cursor_req_attributes( //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width - if (cursor_bpp == 2) - cursor_pitch = 256; - else - cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1); //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. @@ -3409,8 +3345,8 @@ static void calculate_cursor_req_attributes( *cursor_lines_per_chunk = 1; } else { if (cursor_width > 0) { - dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); - DML2_ASSERT(0); + DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); + DML_ASSERT(0); } } @@ -3421,15 +3357,15 @@ static void calculate_cursor_req_attributes( cursor_height = cursor_width; *cursor_bytes = *cursor_bytes_per_line * cursor_height; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); - dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width); - dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); - dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); - dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); - dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); - dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); - dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); - dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch); + DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); + DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width); + DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1)); #endif } @@ -3460,13 +3396,13 @@ static void calculate_cursor_urgent_burst_factor( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); - dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); - dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); - dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); - dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); - dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); - dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); + DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); + DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); + DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); #endif } @@ -3501,15 +3437,15 @@ static void CalculateUrgentBurstFactor( *UrgentBurstFactorChroma = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); - dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC); - dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); - dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); - dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); - dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); #endif - DML2_ASSERT(VRatio > 0); + DML_ASSERT(VRatio > 0); LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; @@ -3534,12 +3470,12 @@ static void CalculateUrgentBurstFactor( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); - dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); - dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); - dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); + DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); #endif } @@ -3600,10 +3536,10 @@ static void CalculateDCFCLKDeepSleepTdlut( if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; - dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); - dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); - dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); - dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); + DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); // increase the deepsleep dcfclk to match the original dispclk throughput rate if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { @@ -3613,8 +3549,8 @@ static void CalculateDCFCLKDeepSleepTdlut( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); - dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); #endif } @@ -3625,17 +3561,17 @@ static void CalculateDCFCLKDeepSleepTdlut( *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); - dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); - dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); - dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); + DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); #endif for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); } - dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); } static noinline_for_stack void CalculateDCFCLKDeepSleep( @@ -3731,12 +3667,12 @@ static unsigned int CalculateMaxVStartup( else max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom); - dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); - dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us); - dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); - dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); - dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); + DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom); + DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us); + DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); #endif max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); return max_vstartup_lines; @@ -3761,9 +3697,9 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch const long MAXIMUMCOMPRESSION = 4; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); } #endif CalculateSwathWidth( @@ -3797,15 +3733,15 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); - dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); #endif if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); @@ -3848,11 +3784,11 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); - dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); - dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); #endif *p->ViewportSizeSupport = true; @@ -3860,7 +3796,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); #endif if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { p->SwathHeightY[k] = MaximumSwathHeightY[k]; @@ -3917,13 +3853,13 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { *p->ViewportSizeSupport = false; - dml2_printf("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); - dml2_printf("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); - dml2_printf("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); p->ViewportSizeSupportPerSurface[k] = false; } else { p->ViewportSizeSupportPerSurface[k] = true; @@ -3931,35 +3867,35 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch if (p->SwathHeightC[k] == 0) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); #endif p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; p->DETBufferSizeC[k] = 0; } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); #endif p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; } else { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); #endif p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); - dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); #endif } @@ -3969,12 +3905,12 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); - dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); #endif *p->hw_debug5 = false; @@ -3989,12 +3925,12 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) *p->hw_debug5 = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); + DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif } #endif @@ -4192,15 +4128,15 @@ static noinline_for_stack void CalculateODMMode( SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock); SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse); - dml2_printf("DML::%s: Output = %d\n", __func__, Output); - dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); - dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); - dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); + DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); + DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); + DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); #endif if (ODMUse == dml2_odm_mode_auto) DecidedODMMode = DecideODMMode(HActive, @@ -4245,10 +4181,10 @@ static noinline_for_stack void CalculateODMMode( *NumberOfDPP = NumberOfDPPRequired; *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode); - dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); - dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); - dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); + DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode); + DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); + DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); #endif } @@ -4292,17 +4228,17 @@ static noinline_for_stack void CalculateOutputLink( *OutputRate = dml2_core_internal_output_rate_unknown; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); - dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); - dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); - dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); - dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); - dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); - dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); - dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); - dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output); - dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); + DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); + DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); + DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); + DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); #endif { if (Output == dml2_hdmi) { @@ -4487,9 +4423,9 @@ static noinline_for_stack void CalculateOutputLink( } } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); - dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); - dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp); + DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); + DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); + DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp); #endif } @@ -4571,17 +4507,17 @@ static unsigned int DSCDelayRequirement( DSCDelayRequirement_val = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); - dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode); - dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); - dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); - dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); - dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); - dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); - dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); - dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); - dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); + DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode); + DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); #endif return DSCDelayRequirement_val; @@ -4654,10 +4590,10 @@ static void CalculateSurfaceSizeInMall( (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); - dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); - dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); - dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); + DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); + DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); #endif } @@ -4674,7 +4610,6 @@ static void calculate_tdlut_setting( unsigned int tdlut_vmpg_per_frame; unsigned int tdlut_pte_req_per_frame; unsigned int tdlut_bytes_per_line; - unsigned int tdlut_delivery_cycles; double tdlut_drain_rate; unsigned int tdlut_mpc_width; unsigned int tdlut_bytes_per_group_simple; @@ -4737,13 +4672,13 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices - tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; - tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; } @@ -4756,25 +4691,25 @@ static void calculate_tdlut_setting( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); - dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); - dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); - dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); - dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); - dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); - dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); - dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); - dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); - dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); - dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); - dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); - dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); - dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); - dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); - dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); - dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); + DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); + DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); + DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); + DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); + DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1)); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); + DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); #endif } @@ -4820,10 +4755,10 @@ static void CalculateTarb( *Tarb = extra_bytes / ReturnBW; *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); - dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); - dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); - dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); + DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); + DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); #endif } @@ -4838,10 +4773,10 @@ static double CalculateTWait( TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip); - dml2_printf("DML::%s: TWait = %f\n", __func__, TWait); + DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait); #endif return TWait; } @@ -4887,20 +4822,20 @@ static void CalculateVUpdateAndDynamicMetadataParameters( *Tdmsks = *Tdmsks / 2; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); - dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank); - dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); - dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); - dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk); - dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); - dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); - dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); - dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); - dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); - dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); - dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); + DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); #endif } @@ -4995,12 +4930,12 @@ static double get_urgent_bandwidth_required( surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); - dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); - dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); - dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); - dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); - dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); #endif } else { surface_required_bw[k] = 0.0; @@ -5009,34 +4944,34 @@ static double get_urgent_bandwidth_required( l->required_bandwidth_mbps += surface_required_bw[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); - dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); - dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); - dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); - dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); - dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); - dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); + DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); - dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); - dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); - dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); - dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); - dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); - dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); - dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); - dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); - dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); - dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); - dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); - dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); - dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); - dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); - dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); - dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); - dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); #endif } @@ -5120,19 +5055,19 @@ static void CalculateExtraLatency( *ExtraLatency_sr = *ExtraLatency_sr + Tarb; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); - dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); - dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); - dml2_printf("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); - dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); - dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); - dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); - dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); - dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); - dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); - dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); - dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); - dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips); + DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); + DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); + DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb); + DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); #endif } @@ -5199,20 +5134,20 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->HostVMDynamicLevelsTrips = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); - dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); - dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); - dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); - dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); - dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); - dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); - dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); - dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); - dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); + DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); + DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); + DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); #endif CalculateVUpdateAndDynamicMetadataParameters( p->MaxInterDCNTileRepeaters, @@ -5258,11 +5193,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { *p->NotEnoughTimeForDynamicMetadata = true; - dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); - dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); } else { *p->NotEnoughTimeForDynamicMetadata = false; } @@ -5288,21 +5223,21 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); - dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); - dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); - dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); - dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); - dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); - dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); - dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); - dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); - dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); + DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); - dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); - dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); - dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); - dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); + DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); #endif if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) @@ -5314,17 +5249,17 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); - dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); - dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); - dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); - dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); - dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); #endif if (p->display_cfg->gpuvm_enable) { s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; @@ -5402,7 +5337,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } /* oto prefetch bw should be always be less than total vactive bw */ - //DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; @@ -5421,9 +5356,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); - dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); - dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); + DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); #endif if (p->display_cfg->gpuvm_enable == true) { @@ -5433,9 +5368,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); - dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); - dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { s->Tvm_oto = s->Tvm_trips_rounded; @@ -5447,9 +5382,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, s->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); - dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); - dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); #endif } else s->Tr0_oto = s->LineTime / 4.0; @@ -5459,11 +5394,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; #ifdef DML_GLOBAL_PREFETCH_CHECK - dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); + DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); if (p->impacted_dst_y_pre > 0) { - dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); - dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); } #endif *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; @@ -5492,72 +5427,71 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); - dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); - dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); - dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); - dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); - dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); - dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); - dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); - dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); - dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); - dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); - dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); - dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); - dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); - dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); - dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); - dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); - dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); - dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); - dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); - dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); - dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); - dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); - dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); -#endif - double Tpre = s->dst_y_prefetch_equ * s->LineTime; + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); + DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); + DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); +#endif s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); - dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); - dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); - dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); - dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); - dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); - dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); - dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); - dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); - dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); - dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); - dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); - dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); - dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); - dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); #endif *p->dst_y_per_vm_vblank = 0; @@ -5596,19 +5530,19 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else s->prefetch_bw1 = 0; - dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); - dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); - dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); + DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); + DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } @@ -5620,10 +5554,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else s->prefetch_bw2 = 0; - dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); - dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); } // prefetch_bw3: 2*R0 + SW @@ -5634,10 +5568,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else s->prefetch_bw3 = 0; - dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); - dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } // prefetch_bw4: SW @@ -5647,17 +5581,17 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw4 = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); - dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); - dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); - dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); - dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); - dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); - dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); - dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); - dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); - dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); + DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); + DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); #endif { bool Case1OK = false; @@ -5676,14 +5610,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); - dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); if (s->prefetch_bw1 > 0) { double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; double row_transfer_time = total_row_bytes / s->prefetch_bw1; - dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); - dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } @@ -5696,8 +5630,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (s->prefetch_bw2 > 0) { double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; double row_transfer_time = total_row_bytes / s->prefetch_bw2; - dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); - dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } @@ -5709,8 +5643,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (s->prefetch_bw3 > 0) { double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; double row_transfer_time = total_row_bytes / s->prefetch_bw3; - dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); - dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } @@ -5730,10 +5664,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); - dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); - dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK); - dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); + DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK); + DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK); + DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); #endif if (s->prefetch_bw_equ > 0) { @@ -5753,12 +5687,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { s->Tvm_equ = 0; s->Tr0_equ = 0; - dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__); } } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); - dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); + DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); + DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); #endif // Use the more stressful prefetch schedule if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { @@ -5769,7 +5703,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); + DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__); #endif } else { @@ -5785,7 +5719,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); + DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif } @@ -5797,31 +5731,31 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); - dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); - dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); - dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); - dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); - dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); + DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); - dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); - dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); - dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); - dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); + DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); #endif - DML2_ASSERT(*p->dst_y_prefetch < 64); + DML_ASSERT(*p->dst_y_prefetch < 64); unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); - dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); #endif if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { @@ -5829,13 +5763,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); #endif } @@ -5843,22 +5777,22 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); - dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); #endif if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); #endif } @@ -5866,36 +5800,34 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); - dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); - dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); - dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); - dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); - dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); - dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); - dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); + DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); + DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); + DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); - dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n", - __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5916,18 +5848,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch prefetch_vm_bw = 0; } else if (*p->dst_y_per_vm_vblank > 0) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); #endif prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); + DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { @@ -5936,14 +5868,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); - dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); #endif } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5963,12 +5895,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->prefetch_vmrow_bw = 0; } - dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); - dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); - dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); - dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); + DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); return s->NoTimeToPrefetch; } @@ -6005,7 +5937,7 @@ static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned } } if (max_idx <= 0) { - DML2_ASSERT(max_idx >= 0); + DML_ASSERT(max_idx >= 0); max_idx = this_plane_idx; } @@ -6037,12 +5969,12 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core // worst case if the rob and cdb is fully hogged s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); - dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); - dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); - dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); - dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); - dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); + DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); #endif // calculate the return impact from each plane, request is 256B per dcfclk @@ -6063,12 +5995,12 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); - dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); - dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); - dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); - dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); - dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); + DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); #endif if (s->src_swath_bytes_c[i] > 0) { // dual_plane @@ -6079,10 +6011,10 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); - dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); - dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); - dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); #endif } @@ -6090,9 +6022,9 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); - dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); - dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); + DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); + DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); #endif // clamping to worst case delay which is one which occupy the full rob+cdb if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) @@ -6109,7 +6041,7 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); + DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); #endif } @@ -6120,8 +6052,8 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core *p->recalc_prefetch_schedule = 1; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); - dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); #endif } } else { @@ -6131,8 +6063,8 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); - dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); + DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); + DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); #endif return s->prefetch_global_check_passed; @@ -6150,8 +6082,8 @@ static void calculate_peak_bandwidth_required( memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); - dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); + DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); #endif for (unsigned int k = 0; k < p->num_active_planes; ++k) { @@ -6347,12 +6279,12 @@ static void calculate_peak_bandwidth_required( p->surface_peak_required_bw[m][n]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); - dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); - dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); - dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); #endif - DML2_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); + DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); } } } @@ -6414,18 +6346,18 @@ static void check_urgent_bandwidth_support( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); - dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); - dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); - dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); - dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); - dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); - dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); + DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { - dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); } @@ -6446,14 +6378,14 @@ static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); - dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); - dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); - dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); - dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); - dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); #endif return flip_bw_available_mbps; @@ -6478,28 +6410,28 @@ static void calculate_immediate_flip_bandwidth_support( *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); - dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); - dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); - dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); - dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); #endif - DML2_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); + DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); } *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); - dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); - dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); - dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); - dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { - dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); } @@ -6549,27 +6481,27 @@ static void CalculateFlipSchedule( l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); - dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); - dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); - dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); - dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); - dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); - dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); - dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); - dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); - dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); - dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); - dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); - dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); - dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); - dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); - dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); - dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); - dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); #endif if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { @@ -6596,9 +6528,9 @@ static void CalculateFlipSchedule( l->min_row_time = l->min_row_height * LineTime / VRatio; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); + DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); #endif - DML2_ASSERT(l->min_row_time > 0); + DML_ASSERT(l->min_row_time > 0); if (use_lb_flip_bw) { // For mode check, calculation the flip bw requirement with worst case flip time @@ -6619,20 +6551,20 @@ static void CalculateFlipSchedule( l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); - dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); - dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); - dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); - dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); - dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); + DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); + DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); if (l->lb_flip_bw > 0) { - dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); - dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); - dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); - dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); - dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6640,8 +6572,8 @@ static void CalculateFlipSchedule( (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); - dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); #endif } @@ -6653,13 +6585,12 @@ static void CalculateFlipSchedule( } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) - double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); - dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); - dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion); + DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); + DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes); #endif if (l->ImmediateFlipBW == 0) { l->Tvm_flip = 0; @@ -6674,11 +6605,11 @@ static void CalculateFlipSchedule( LineTime / 4.0); } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); - dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); - dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); #endif *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; @@ -6711,14 +6642,14 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ if (!use_lb_flip_bw) { - dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); - dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); - dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); - dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); - dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } - dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); - dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); + DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); #endif } @@ -6736,7 +6667,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); #endif p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; @@ -6755,20 +6686,20 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); - dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); - dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); - dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); - dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); - dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); - dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); - dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); - dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); - dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); - dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); - dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); #endif s->TotalActiveWriteback = 0; @@ -6801,11 +6732,11 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); - dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); - dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); - dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); - dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); + DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); + DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); #endif s->TotalPixelBW = 0.0; @@ -6836,11 +6767,11 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); - dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); - dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel); - dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); - dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); + DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); + DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel); + DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); #endif s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); @@ -6943,16 +6874,16 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); - dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); - dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); - dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); - dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); - dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); #endif p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; @@ -6967,10 +6898,10 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); - dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); - dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); - dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); #endif } } @@ -6992,10 +6923,10 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); - dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); - dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); - dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); + DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); + DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); #endif } @@ -7141,7 +7072,7 @@ static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struc unsigned int index = 0; for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); if (i == 0) index = 0; @@ -7153,10 +7084,8 @@ static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struc break; } } -#if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, index); -#endif + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); return index; } @@ -7166,7 +7095,7 @@ static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, c bool clk_entry_found = 0; for (i = 0; i < clk_table->uclk.num_clk_values; i++) { - dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { clk_entry_found = 1; @@ -7175,10 +7104,10 @@ static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, c } if (!clk_entry_found) - DML2_ASSERT(clk_entry_found); + DML_ASSERT(clk_entry_found); #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, i); + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); #endif return i; } @@ -7218,10 +7147,10 @@ static void calculate_hostvm_inefficiency_factor( if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) *HostVMInefficiencyFactorPrefetch = 4; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); - dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); - dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); #endif } } @@ -7341,10 +7270,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; -#if defined(__DML_VBA_DEBUG__) - double old_ReadBandwidthLuma; - double old_ReadBandwidthChroma; -#endif double outstanding_latency_us = 0; double min_return_bw_for_latency; @@ -7384,25 +7309,25 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: --- START --- \n", __func__); - dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); - dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); - dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); - dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); - dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); - dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); - dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); - dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); - dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); - dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); + DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); + DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); + DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); for (k = 0; k < mode_lib->ms.num_active_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); #endif CalculateMaxDETAndMinCompressedBufferSize( @@ -7504,12 +7429,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); #ifdef __DML_VBA_DEBUG__ - old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0; - dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma); - dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); #endif } @@ -7629,13 +7552,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); } /* Cursor Support Check */ @@ -7672,11 +7595,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { mode_lib->ms.support.PitchSupport = false; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); - dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); - dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); - dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); - dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); + DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); #endif } @@ -7708,11 +7631,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); - dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); - dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); - dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); - dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif } if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { @@ -7894,8 +7817,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; } #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); - dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); #endif // ensure the number dsc slices is integer multiple based on ODM mode @@ -7911,9 +7834,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0); #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.DSCSlicesODMModeSupported) { - dml2_printf("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); - dml2_printf("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); - dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); } #endif } else { @@ -7958,7 +7881,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 1; if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { - dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); + DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); } } else { if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { @@ -7968,7 +7891,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); #endif } @@ -8167,7 +8090,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->DSCFormatFactor = 1; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); #endif if (mode_lib->ms.RequiresDSC[k] == true) { s->PixelClockBackEndFactor = 3.0; @@ -8185,10 +8108,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); - dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); - dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); - dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); #endif } } @@ -8423,13 +8346,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); - dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); - dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); #endif /* VActive bytes to fetch for UCLK P-State */ @@ -8531,9 +8454,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); - dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); #endif CalculateUrgentBurstFactor( @@ -8605,7 +8528,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); #endif /* Immediate Flip and MALL parameters */ @@ -8654,16 +8577,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); - dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); - dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); - dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); - dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); - dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); - dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); - dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); + DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; @@ -8703,10 +8625,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); - dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); - dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); - dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); + DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); #endif } @@ -8722,8 +8644,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); - dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); #endif } } @@ -8869,7 +8791,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; - dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); } } @@ -8878,7 +8800,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { mode_lib->ms.support.AvgBandwidthSupport = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); #endif } } @@ -9029,8 +8951,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); - dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); #endif CalculatePrefetchSchedule_params->display_cfg = display_cfg; CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; @@ -9120,8 +9042,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; - dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); - dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); } // for k num_planes CalculateDCFCLKDeepSleepTdlut( @@ -9152,11 +9074,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out || mode_lib->ms.NoTimeForPrefetch[k] == true || s->DSTYAfterScaler[k] > 8) { mode_lib->ms.support.PrefetchSupported = false; - dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); - dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); - dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); } } @@ -9172,9 +9094,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; - dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); } } @@ -9188,9 +9110,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; k++) { // Calculate Urgent burst factor for prefetch #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); - dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); - dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); #endif CalculateUrgentBurstFactor( &display_cfg->plane_descriptors[k], @@ -9274,12 +9196,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.urg_bandwidth_available); mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; - dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { mode_lib->ms.support.PrefetchSupported = false; - dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); } } @@ -9509,7 +9431,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); } - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); // End of Prefetch Check mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; @@ -9546,8 +9468,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); - dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); + DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ @@ -9597,17 +9519,17 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMALLSize && mode_lib->ms.support.g6_temp_read_support && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { - dml2_printf("DML::%s: mode is supported\n", __func__); + DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__); mode_lib->ms.support.ModeSupport = true; } else { - dml2_printf("DML::%s: mode is NOT supported\n", __func__); + DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__); mode_lib->ms.support.ModeSupport = false; } } // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). - dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); - dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; @@ -9623,8 +9545,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); - dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); #endif } @@ -9632,7 +9554,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (!mode_lib->ms.support.ModeSupport) dml2_print_mode_support_info(&mode_lib->ms.support, true); - dml2_printf("DML::%s: --- DONE --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__); #endif return mode_lib->ms.support.ModeSupport; @@ -9642,18 +9564,18 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support { unsigned int result; - dml2_printf("DML::%s: ------------- START ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); result = dml_core_mode_support(in_out_params); if (result) *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; - dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); - dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); return result; } @@ -9687,19 +9609,19 @@ static void CalculatePixelDeliveryTimes( double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); - dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); - dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); - dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); - dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); - dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); - dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); - dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); - dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); + DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); #endif if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; @@ -9733,10 +9655,10 @@ static void CalculatePixelDeliveryTimes( } } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); #endif } @@ -9752,12 +9674,12 @@ static void CalculatePixelDeliveryTimes( DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); - dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); - dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); #endif } } @@ -9853,14 +9775,14 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); - dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); - dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); - dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); - dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); - dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); - dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); - dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); #endif } @@ -9881,7 +9803,7 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE else p->time_per_tdlut_group[k] = 0; - dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); if (p->display_cfg->gpuvm_enable == true) { if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { @@ -9897,14 +9819,14 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE if (dpte_groups_per_row_luma_ub <= 2) { dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; } - dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); - dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); - dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); - dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; @@ -9928,9 +9850,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE if (dpte_groups_per_row_chroma_ub <= 2) { dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; } - dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); - dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; @@ -9945,17 +9867,17 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE p->time_per_pte_group_flip_chroma[k] = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); - dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); - dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); - dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); #endif } } // CalculateMetaAndPTETimes @@ -9991,18 +9913,18 @@ static void CalculateVMGroupAndRequestTimes( double line_time; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); #endif for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); - dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); - dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); - dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); - dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); - dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); #endif if (display_cfg->gpuvm_enable) { @@ -10071,13 +9993,13 @@ static void CalculateVMGroupAndRequestTimes( else TimePerVMRequestFlip[k] = 0.0; - dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); - dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); - dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); - dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref); - dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip); - dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref); - dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip); if (display_cfg->gpuvm_max_page_table_levels > 2) { TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; @@ -10094,10 +10016,10 @@ static void CalculateVMGroupAndRequestTimes( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); - dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); - dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); - dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); #endif } } @@ -10113,7 +10035,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc unsigned int SingleVTotal = 0; bool SameTiming = true; bool FoundCriticalSurface = false; - double LastZ8StutterPeriod = 0; memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); @@ -10127,9 +10048,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); - dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); #endif l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; @@ -10142,9 +10063,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); - dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); - dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); #endif l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; @@ -10160,19 +10081,19 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); - dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); - dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); - dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); - dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); - dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); - dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); - dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); - dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); - dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); - dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); #endif if (l->AverageDCCZeroSizeFraction == 1) { l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; @@ -10189,10 +10110,10 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); - dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); - dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); - dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); #endif } else { l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, @@ -10200,16 +10121,16 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); - dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); - dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); - dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); - dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); + DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); #endif *p->StutterPeriod = 0; @@ -10220,15 +10141,15 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); - dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); - dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); - dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); - dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); + DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); #endif if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { @@ -10248,17 +10169,17 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); - dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); - dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); - dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); - dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); - dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); - dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); - dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); - dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); - dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); - dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); #endif } } @@ -10276,14 +10197,14 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); - dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); - dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); - dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); - dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); - dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); - dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); - dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); + DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); #endif l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer @@ -10292,10 +10213,10 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); - dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); - dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); - dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); #endif l->TotalActiveWriteback = 0; memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); @@ -10324,9 +10245,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc if (l->TotalActiveWriteback == 0) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); - dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); - dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); #endif *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; @@ -10339,11 +10260,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc *p->Z8NumberOfStutterBurstsPerFrame = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); - dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); - dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); - dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif if (*p->StutterEfficiencyNotIncludingVBlank > 0) { @@ -10358,7 +10279,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { - LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; } else { @@ -10370,25 +10291,25 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); - dml2_printf("DML::%s: SameTiming = %u\n", __func__, SameTiming); - dml2_printf("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); - dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); - dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); - dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); - dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); - dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); - dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); + DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming); + DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); + DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); - dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); #endif } @@ -10422,7 +10343,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex double max_uclk_mhz = 0; double min_return_latency_in_DCFCLK_cycles = 0; - dml2_printf("DML::%s: --- START --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); @@ -10444,13 +10365,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); - DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 || cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); if (cfg_support_info->stream_support_info[stream_index].odms_used > 1) - DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); switch (cfg_support_info->stream_support_info[stream_index].odms_used) { case (4): @@ -10476,51 +10397,51 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; - DML2_ASSERT(mode_lib->mp.Dppclk[k] > 0); + DML_ASSERT(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; - dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); + DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; - DML2_ASSERT(mode_lib->mp.Dcfclk > 0); - DML2_ASSERT(mode_lib->mp.FabricClock > 0); - DML2_ASSERT(mode_lib->mp.dram_bw_mbps > 0); - DML2_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); - DML2_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); - DML2_ASSERT(mode_lib->mp.Dispclk > 0); - DML2_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); - DML2_ASSERT(s->SOCCLK > 0); - -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); - dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); - dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); - dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); - dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); - dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); + DML_ASSERT(mode_lib->mp.Dcfclk > 0); + DML_ASSERT(mode_lib->mp.FabricClock > 0); + DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0); + DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); + DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); + DML_ASSERT(mode_lib->mp.Dispclk > 0); + DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); + DML_ASSERT(s->SOCCLK > 0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); + DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); for (k = 0; k < s->num_active_planes; ++k) { - dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); - } - dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); - dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); - dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); - dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); - dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); - dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); + DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); + } + DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); + DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); + DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { - dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); - dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); + DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); } for (k = 0; k < s->num_active_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); #endif CalculateMaxDETAndMinCompressedBufferSize( @@ -10617,8 +10538,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); } CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; @@ -11173,8 +11094,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.WritebackDelay[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); #endif } @@ -11183,7 +11104,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); + DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); #endif if (s->num_active_planes > 1) { @@ -11219,12 +11140,12 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->DestinationLineTimesForPrefetchLessThan2 = false; s->VRatioPrefetchMoreThanMax = false; - dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); + DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); for (k = 0; k < s->num_active_planes; ++k) { struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; - dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, @@ -11261,7 +11182,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); #endif CalculatePrefetchSchedule_params->display_cfg = display_cfg; CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; @@ -11356,7 +11277,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.impacted_prefetch_margin_us[k] = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); #endif mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; } // for k @@ -11366,9 +11287,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.NoTimeToPrefetch[k] == true || mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || mode_lib->mp.DSTYAfterScaler[k] > 8) { - dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); - dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); - dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); mode_lib->mp.PrefetchModeSupported = false; } if (mode_lib->mp.dst_y_prefetch[k] < 2) @@ -11377,24 +11298,24 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; - dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { - dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); mode_lib->mp.PrefetchModeSupported = false; } } if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { - dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); - dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); mode_lib->mp.PrefetchModeSupported = false; } - dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, + DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); // Prefetch schedule OK, now check prefetch bw @@ -11422,24 +11343,24 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); - dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); - dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); - dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); - dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); - dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); - dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); - dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); - dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); - dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); #endif } @@ -11503,11 +11424,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.urg_bandwidth_available); if (!mode_lib->mp.PrefetchModeSupported) - dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); for (k = 0; k < s->num_active_planes; ++k) { if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { - dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); mode_lib->mp.PrefetchModeSupported = false; } } @@ -11533,12 +11454,12 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k = %u\n", __func__, k); - dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); - dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); - dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); - dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); + DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); #endif } for (k = 0; k < s->num_active_planes; ++k) { @@ -11631,13 +11552,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.urg_bandwidth_available); if (!mode_lib->mp.ImmediateFlipSupported) - dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__); + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__); for (k = 0; k < s->num_active_planes; ++k) { if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { mode_lib->mp.ImmediateFlipSupported = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); #endif } } @@ -11650,28 +11571,28 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); for (k = 0; k < s->num_active_planes; ++k) - dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); - dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); - dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); - dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); + DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); #endif - dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); } for (k = 0; k < s->num_active_planes; ++k) - dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { - dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); } else { - dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); + DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); // DCC Configuration for (k = 0; k < s->num_active_planes; ++k) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); #endif CalculateDCCConfiguration( display_cfg->plane_descriptors[k].surface.dcc.enable, @@ -11780,8 +11701,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); - dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); - dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); + DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); + DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); //Display Pipeline Delivery Time in Prefetch, Groups CalculatePixelDeliveryTimes( @@ -11893,15 +11814,15 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); #endif s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); - dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); #endif mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; @@ -11920,9 +11841,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; if (s->blank_lines_remaining < 0) { - dml2_printf("ERROR: Vstartup is larger than vblank!?\n"); + DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n"); s->blank_lines_remaining = 0; - DML2_ASSERT(0); + DML_ASSERT(0); } mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; @@ -11936,18 +11857,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); - dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); - dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); - dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); - dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); - dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); - dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); - dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); - dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); - dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); - dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); - dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); + DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); #endif } @@ -11969,9 +11890,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); #endif } @@ -12051,28 +11972,28 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); - DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); + DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); - dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); - dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); - dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles); - dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles); - dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); - dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); - dml2_printf("DML::%s: --- END --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); + DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__); #endif return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); } bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { - dml2_printf("DML::%s: ------------- START ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: result = %0d\n", __func__, result); - dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result); + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); return result; } @@ -12130,16 +12051,16 @@ void dml2_core_calcs_get_dpte_row_height( unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1); - dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); - dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); - dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); - dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); - dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); - dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); - dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); - dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); - dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1); + DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); #endif unsigned int dummy_integer[21]; @@ -12193,7 +12114,7 @@ void dml2_core_calcs_get_dpte_row_height( CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); + DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); #endif } @@ -12220,6 +12141,8 @@ static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); @@ -12246,11 +12169,11 @@ void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); - dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); - dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); - dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); - dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); + DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); + DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); #endif cursor_dlg_regs->chunk_hdl_adjust = 3; @@ -12286,7 +12209,7 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, double stored_swath_c_bytes; bool is_phantom_pipe; - dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); @@ -12329,19 +12252,19 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); + DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); #endif - DML2_ASSERT(p0_pte_row_height_linear >= 8); + DML_ASSERT(p0_pte_row_height_linear >= 8); rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3; if (dual_plane) { unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); + DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); #endif if (sw_mode == dml2_sw_linear) { - DML2_ASSERT(p1_pte_row_height_linear >= 8); + DML_ASSERT(p1_pte_row_height_linear >= 8); } rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3; } @@ -12375,12 +12298,12 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); #endif } else { detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); #endif } } @@ -12388,15 +12311,15 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, rq_regs->plane1_base_address = detile_buf_plane1_addr; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); - dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); - dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); - dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); - dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); - dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); + DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); #endif - //dml2_printf_rq_regs_st(rq_regs); - dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + //DML_LOG_VERBOSE_rq_regs_st(rq_regs); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); } static void rq_dlg_get_dlg_reg( @@ -12411,10 +12334,10 @@ static void rq_dlg_get_dlg_reg( memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); - dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); - DML2_ASSERT(l->plane_idx < DML2_MAX_PLANES); + DML_ASSERT(l->plane_idx < DML2_MAX_PLANES); l->source_format = dml2_444_8; l->odm_mode = dml2_odm_mode_bypass; @@ -12444,18 +12367,18 @@ static void rq_dlg_get_dlg_reg( l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; - dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); - dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); - dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); - dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); - dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz); - dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); - dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); - dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); + DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); + DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); + DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); - DML2_ASSERT(l->refclk_freq_in_mhz != 0); - DML2_ASSERT(l->pclk_freq_in_mhz != 0); - DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0); + DML_ASSERT(l->refclk_freq_in_mhz != 0); + DML_ASSERT(l->pclk_freq_in_mhz != 0); + DML_ASSERT(l->ref_freq_to_pix_freq < 4.0); // Need to figure out which side of odm combine we're in // Assume the pipe instance under the same plane is in order @@ -12484,14 +12407,14 @@ static void rq_dlg_get_dlg_reg( l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); - dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); - dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); - dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); - dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); + DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); } - dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); - DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); @@ -12500,20 +12423,20 @@ static void rq_dlg_get_dlg_reg( l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); - dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); - dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); - dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; - dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); - dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); - dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); + DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; @@ -12521,28 +12444,28 @@ static void rq_dlg_get_dlg_reg( l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; - dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); - dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); - dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); - dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); - dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { - DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); + DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); } l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; - dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); - dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); // Active l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); l->refcyc_per_line_delivery_pre_c = 0.0; l->refcyc_per_line_delivery_c = 0.0; @@ -12551,8 +12474,8 @@ static void rq_dlg_get_dlg_reg( l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); } disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); @@ -12561,8 +12484,8 @@ static void rq_dlg_get_dlg_reg( l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); l->refcyc_per_req_delivery_pre_c = 0.0; l->refcyc_per_req_delivery_c = 0.0; @@ -12570,16 +12493,16 @@ static void rq_dlg_get_dlg_reg( l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); } // TTU - Cursor - DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); + DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); // Assign to register structures disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); - DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); + DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk @@ -12592,10 +12515,10 @@ static void rq_dlg_get_dlg_reg( disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); @@ -12662,11 +12585,11 @@ static void rq_dlg_get_dlg_reg( disp_ttu_regs->qos_ramp_disable_c = 0; disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); - // CHECK for HW registers' range, DML2_ASSERT or clamp - DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); - DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); - DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); - DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); + // CHECK for HW registers' range, DML_ASSERT or clamp + DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); @@ -12680,16 +12603,16 @@ static void rq_dlg_get_dlg_reg( disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); - DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); - DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { - dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; } if (l->dual_plane) { if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { - dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; } } @@ -12700,20 +12623,20 @@ static void rq_dlg_get_dlg_reg( if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); } - DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); if (l->dual_plane) { - DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); } - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); - DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); - DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); - dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); } } @@ -12736,11 +12659,11 @@ static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, co arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); - dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); - dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); - dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); - dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); + DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); + DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); + DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); #endif } @@ -13013,10 +12936,10 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); - dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); - dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); - dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us); + DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); + DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); + DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); + DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 4e502f0a6d207..bdee6ad7bc59c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -1078,6 +1078,8 @@ struct dml2_core_calcs_mode_programming_locals { enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; unsigned int lb_source_lines_l[DML2_MAX_PLANES]; unsigned int lb_source_lines_c[DML2_MAX_PLANES]; + unsigned int num_dsc_slices[DML2_MAX_PLANES]; + bool dsc_enable[DML2_MAX_PLANES]; }; struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c index 2504d9c2ec349..5a4d331047e3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -82,7 +82,7 @@ bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) val = 0; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -145,7 +145,7 @@ bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) val = 0; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -208,7 +208,7 @@ bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) val = 1; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -216,104 +216,104 @@ bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { - dml2_printf("DML: ===================================== \n"); - dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) - dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) - dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) - dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) - dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) - dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) - dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) - dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) - dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); if (!fail_only || support->g6_temp_read_support == 0) - dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); if (!fail_only || support->ModeSupport == 0) - dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - dml2_printf("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); } const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) @@ -358,9 +358,9 @@ void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_di out_bpp[k] = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); - dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); - dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); #endif } } @@ -391,7 +391,7 @@ unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); #endif return num_active_pipes; } @@ -452,7 +452,7 @@ unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw else if (sw_mode == dml2_gfx11_sw_256kb_r_x) return 262144; else { - DML2_ASSERT(0); + DML_ASSERT(0); return 256; }; } @@ -498,8 +498,8 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_gfx11_sw_256kb_r_x) version = 11; else { - dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); } return version; @@ -511,7 +511,7 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co unsigned int index = 0; for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); if (i == 0) index = 0; @@ -524,8 +524,8 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co } } #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, index); + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); #endif return index; } @@ -536,7 +536,7 @@ unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_fr bool clk_entry_found = 0; for (i = 0; i < clk_table->uclk.num_clk_values; i++) { - dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { clk_entry_found = 1; @@ -545,10 +545,10 @@ unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_fr } if (!clk_entry_found) - DML2_ASSERT(clk_entry_found); + DML_ASSERT(clk_entry_found); #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, i); + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); #endif return i; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 15507926f3a41..f486b090bbfc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -754,6 +754,8 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); @@ -768,6 +770,8 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index f50662b832966..d88b3e0082dd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -659,7 +659,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) for (i = 1; i <= PMO_DCN4_MAX_DISPLAYS; i++) { switch (i) { case 1: - DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( @@ -670,7 +670,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 2: - DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( @@ -681,7 +681,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 3: - DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( @@ -692,7 +692,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 4: - DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c index dc2ce5e77f579..4a7c4c62111e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c @@ -761,7 +761,7 @@ bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache total_mcaches_required--; } } - dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { result = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c deleted file mode 100644 index c506667897c4c..0000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_debug.h" - -int dml2_log_internal(const char *format, ...) -{ - return 0; -} - -int dml2_printf(const char *format, ...) -{ -#ifdef _DEBUG -#ifdef _DEBUG_PRINTS - int result; - va_list args; - va_start(args, format); - - result = vprintf(format, args); - - va_end(args); - - return result; -#else - return 0; -#endif -#else - return 0; -#endif -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index bfe6f236d2e44..b226225103c34 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -5,55 +5,62 @@ #ifndef __DML2_DEBUG_H__ #define __DML2_DEBUG_H__ -#ifndef DML2_ASSERT -#define DML2_ASSERT(condition) ((void)0) -#endif +#include "os_types.h" +#define DML_ASSERT(condition) ASSERT(condition) +#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN +#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__) -/* - * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart. - * DML_LOG_ERROR - unexpected but recoverable failures inside DML - * DML_LOG_WARN - unexpected inputs or events to DML - * DML_LOG_INFO - high level tracing of DML interfaces - * DML_LOG_DEBUG - detailed tracing of DML internal components - * DML_LOG_VERBOSE - detailed tracing of DML calculation procedure - */ -#if !defined(DML_LOG_LEVEL) -#if defined(_DEBUG) && defined(_DEBUG_PRINTS) -/* for backward compatibility with old macros */ -#define DML_LOG_LEVEL 5 -#else -#define DML_LOG_LEVEL 0 -#endif -#endif +/* ASSERT with message output */ +#define DML_ASSERT_MSG(condition, fmt, ...) \ + do { \ + if (!(condition)) { \ + DML_LOG_ERROR("DML ASSERT hit in %s line %d\n", __func__, __LINE__); \ + DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ + DML_ASSERT(condition); \ + } \ + } while (0) + +/* fatal errors for unrecoverable DML states until a full reset */ +#define DML_LOG_LEVEL_FATAL 0 +/* unexpected but recoverable failures inside DML */ +#define DML_LOG_LEVEL_ERROR 1 +/* unexpected inputs or events to DML */ +#define DML_LOG_LEVEL_WARN 2 +/* high level tracing of DML interfaces */ +#define DML_LOG_LEVEL_INFO 3 +/* detailed tracing of DML internal components */ +#define DML_LOG_LEVEL_DEBUG 4 +/* detailed tracing of DML calculation procedure */ +#define DML_LOG_LEVEL_VERBOSE 5 -#define DML_LOG_FATAL(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) -#if DML_LOG_LEVEL >= 1 -#define DML_LOG_ERROR(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#ifndef DML_LOG_LEVEL +#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT +#endif /* #ifndef DML_LOG_LEVEL */ + +#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR +#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__) #else #define DML_LOG_ERROR(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 2 -#define DML_LOG_WARN(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN +#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__) #else #define DML_LOG_WARN(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 3 -#define DML_LOG_INFO(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO +#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__) #else #define DML_LOG_INFO(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 4 -#define DML_LOG_DEBUG(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG +#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL("[DML DEBUG] "fmt, ## __VA_ARGS__) #else #define DML_LOG_DEBUG(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 5 -#define DML_LOG_VERBOSE(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE +#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL("[DML VERBOSE] "fmt, ## __VA_ARGS__) #else #define DML_LOG_VERBOSE(fmt, ...) ((void)0) #endif - -int dml2_log_internal(const char *format, ...); -int dml2_printf(const char *format, ...); - -#endif +#endif /* __DML2_DEBUG_H__ */ -- GitLab From 8b8a602c985e99074fa1d5233cd224b7bcfb9df2 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Tue, 11 Mar 2025 17:16:57 +0800 Subject: [PATCH 015/899] drm/amd/display: Skip to enable dsc if it has been off [Why] It makes DSC enable when we commit the stream which need keep power off.And then it will skip to disable DSC if pipe reset at this situation as power has been off. It may cause the DSC unexpected enable on the pipe with the next new stream which doesn't support DSC. [HOW] Check the DSC used on current pipe status when update stream. Skip to enable if it has been off. The operation enable DSC should happen when set power on. Reviewed-by: Wenjing Liu Signed-off-by: Paul Hsieh Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index be26c925fdfa1..e68f21fd5f0fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -84,6 +84,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; -- GitLab From 4a8396d5c208b4e3bc113d528af927f60cb8546f Mon Sep 17 00:00:00 2001 From: Chun-Liang Chang Date: Fri, 21 Feb 2025 10:36:22 -0600 Subject: [PATCH 016/899] drm/amd/display: Add Read Histogram command header [Why] Read the histogram for VariBright validation [How] Add dc/dmub functions to read histogram and ACE Reviewed-by: Jun Lei Signed-off-by: Chun-Liang Chang Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 2 + .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index ccc154b0281c2..3b9011ef9b68a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -28,6 +28,8 @@ #include "dc.h" #include "core_types.h" #include "dmub_cmd.h" +#include "dc_dmub_srv.h" +#include "dmub/dmub_srv.h" #define TO_DMUB_ABM(abm)\ container_of(abm, struct dce_abm, base) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 1f5f4e3e49d41..5e7c698f9bfb6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4360,6 +4360,11 @@ enum dmub_cmd_abm_type { * Get the current ACE curve. */ DMUB_CMD__ABM_GET_ACE_CURVE = 10, + + /** + * Get current histogram data + */ + DMUB_CMD__ABM_GET_HISTOGRAM_DATA = 11, }; struct abm_ace_curve { @@ -4953,6 +4958,20 @@ enum dmub_abm_ace_curve_type { ABM_ACE_CURVE_TYPE__SW_IF = 1, }; +/** + * enum dmub_abm_histogram_type - Histogram type. + */ +enum dmub_abm_histogram_type { + /** + * ACE curve as defined by the SW layer. + */ + ABM_HISTOGRAM_TYPE__SW = 0, + /** + * ACE curve as defined by the SW to HW translation interface layer. + */ + ABM_HISTOGRAM_TYPE__SW_IF = 1, +}; + /** * Definition of a DMUB_CMD__ABM_GET_ACE_CURVE command. */ @@ -4988,6 +5007,41 @@ struct dmub_rb_cmd_abm_get_ace_curve { uint8_t pad; }; +/** + * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command. + */ +struct dmub_rb_cmd_abm_get_histogram { + /** + * Command header. + */ + struct dmub_cmd_header header; + + /** + * Address where Histogram should be copied. + */ + union dmub_addr dest; + + /** + * Type of Histogram being queried. + */ + enum dmub_abm_histogram_type histogram_type; + + /** + * Indirect buffer length. + */ + uint16_t bytes; + + /** + * eDP panel instance. + */ + uint8_t panel_inst; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad; +}; + /** * Definition of a DMUB_CMD__ABM_SAVE_RESTORE command. */ @@ -5686,6 +5740,11 @@ union dmub_rb_cmd { */ struct dmub_rb_cmd_abm_get_ace_curve abm_get_ace_curve; + /** + * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command. + */ + struct dmub_rb_cmd_abm_get_histogram abm_get_histogram; + /** * Definition of a DMUB_CMD__ABM_SET_EVENT command. */ -- GitLab From d5a7fdc88a2d64242d959942cbd0e1499ebb9806 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 12 Mar 2025 17:30:25 -0400 Subject: [PATCH 017/899] drm/amd/display: fix zero value for APU watermark_c [why] the guard of is_apu not in sync, caused no watermark_c output. Reviewed-by: Ovidiu Bunea Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index f549a778f6f1a..04316d719426a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -661,7 +661,10 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s dml2_copy_clocks_to_dc_state(&out_clks, context); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); - memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); + if (context->streams[0]->sink->link->dc->caps.is_apu) + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx); + else + memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx); //copy for deciding zstate use -- GitLab From c82d84d1e46c9dd16050a3a5cc7639ddb3a01eb8 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 13 Mar 2025 13:43:41 -0400 Subject: [PATCH 018/899] drm/amd/display: Create a temporary scratch dc_link Create a temporary scratch dc_link for programming purposes and fix a copy of pipe_ctx on the stack to a pointer reference. Reviewed-by: Josip Pavic Signed-off-by: Aric Cyr Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 329 +++++++++--------- .../gpu/drm/amd/display/dc/inc/core_types.h | 2 +- .../link/protocols/link_edp_panel_control.c | 8 +- 3 files changed, 170 insertions(+), 169 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index be2518e07c14b..13f3a83705925 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1419,6 +1419,170 @@ struct dc_scratch_space { struct dc_stream_state stream_state; }; +/* + * A link contains one or more sinks and their connected status. + * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. + */ + struct dc_link { + struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK]; + unsigned int sink_count; + struct dc_sink *local_sink; + unsigned int link_index; + enum dc_connection_type type; + enum signal_type connector_signal; + enum dc_irq_source irq_source_hpd; + enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ + + bool is_hpd_filter_disabled; + bool dp_ss_off; + + /** + * @link_state_valid: + * + * If there is no link and local sink, this variable should be set to + * false. Otherwise, it should be set to true; usually, the function + * core_link_enable_stream sets this field to true. + */ + bool link_state_valid; + bool aux_access_disabled; + bool sync_lt_in_progress; + bool skip_stream_reenable; + bool is_internal_display; + /** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */ + bool is_dig_mapping_flexible; + bool hpd_status; /* HPD status of link without physical HPD pin. */ + bool is_hpd_pending; /* Indicates a new received hpd */ + + /* USB4 DPIA links skip verifying link cap, instead performing the fallback method + * for every link training. This is incompatible with DP LL compliance automation, + * which expects the same link settings to be used every retry on a link loss. + * This flag is used to skip the fallback when link loss occurs during automation. + */ + bool skip_fallback_on_link_loss; + + bool edp_sink_present; + + struct dp_trace dp_trace; + + /* caps is the same as reported_link_cap. link_traing use + * reported_link_cap. Will clean up. TODO + */ + struct dc_link_settings reported_link_cap; + struct dc_link_settings verified_link_cap; + struct dc_link_settings cur_link_settings; + struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX]; + struct dc_link_settings preferred_link_setting; + /* preferred_training_settings are override values that + * come from DM. DM is responsible for the memory + * management of the override pointers. + */ + struct dc_link_training_overrides preferred_training_settings; + struct dp_audio_test_data audio_test_data; + + uint8_t ddc_hw_inst; + + uint8_t hpd_src; + + uint8_t link_enc_hw_inst; + /* DIG link encoder ID. Used as index in link encoder resource pool. + * For links with fixed mapping to DIG, this is not changed after dc_link + * object creation. + */ + enum engine_id eng_id; + enum engine_id dpia_preferred_eng_id; + + bool test_pattern_enabled; + /* Pending/Current test pattern are only used to perform and track + * FIXED_VS retimer test pattern/lane adjustment override state. + * Pending allows link HWSS to differentiate PHY vs non-PHY pattern, + * to perform specific lane adjust overrides before setting certain + * PHY test patterns. In cases when lane adjust and set test pattern + * calls are not performed atomically (i.e. performing link training), + * pending_test_pattern will be invalid or contain a non-PHY test pattern + * and current_test_pattern will contain required context for any future + * set pattern/set lane adjust to transition between override state(s). + * */ + enum dp_test_pattern current_test_pattern; + enum dp_test_pattern pending_test_pattern; + + union compliance_test_state compliance_test_state; + + void *priv; + + struct ddc_service *ddc; + + enum dp_panel_mode panel_mode; + bool aux_mode; + + /* Private to DC core */ + + const struct dc *dc; + + struct dc_context *ctx; + + struct panel_cntl *panel_cntl; + struct link_encoder *link_enc; + struct graphics_object_id link_id; + /* Endpoint type distinguishes display endpoints which do not have entries + * in the BIOS connector table from those that do. Helps when tracking link + * encoder to display endpoint assignments. + */ + enum display_endpoint_type ep_type; + union ddi_channel_mapping ddi_channel_mapping; + struct connector_device_tag_info device_tag; + struct dpcd_caps dpcd_caps; + uint32_t dongle_max_pix_clk; + unsigned short chip_caps; + unsigned int dpcd_sink_count; + struct hdcp_caps hdcp_caps; + enum edp_revision edp_revision; + union dpcd_sink_ext_caps dpcd_sink_ext_caps; + + struct psr_settings psr_settings; + struct replay_settings replay_settings; + + /* Drive settings read from integrated info table */ + struct dc_lane_settings bios_forced_drive_settings; + + /* Vendor specific LTTPR workaround variables */ + uint8_t vendor_specific_lttpr_link_rate_wa; + bool apply_vendor_specific_lttpr_link_rate_wa; + + /* MST record stream using this link */ + struct link_flags { + bool dp_keep_receiver_powered; + bool dp_skip_DID2; + bool dp_skip_reset_segment; + bool dp_skip_fs_144hz; + bool dp_mot_reset_segment; + /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ + bool dpia_mst_dsc_always_on; + /* Forced DPIA into TBT3 compatibility mode. */ + bool dpia_forced_tbt3_mode; + bool dongle_mode_timing_override; + bool blank_stream_on_ocs_change; + bool read_dpcd204h_on_irq_hpd; + bool force_dp_ffe_preset; + } wa_flags; + union dc_dp_ffe_preset forced_dp_ffe_preset; + struct link_mst_stream_allocation_table mst_stream_alloc_table; + + struct dc_link_status link_status; + struct dprx_states dprx_states; + + struct gpio *hpd_gpio; + enum dc_link_fec_state fec_state; + bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly + + struct dc_panel_config panel_config; + struct phy_state phy_state; + uint32_t phy_transition_bitmask; + // BW ALLOCATON USB4 ONLY + struct dc_dpia_bw_alloc dpia_bw_alloc_config; + bool skip_implict_edp_power_control; + enum backlight_control_type backlight_control_type; +}; + struct dc { struct dc_debug_options debug; struct dc_versions versions; @@ -1486,6 +1650,7 @@ struct dc { struct dc_scratch_space current_state; struct dc_scratch_space new_state; struct dc_stream_state temp_stream; // Used so we don't need to allocate stream on the stack + struct dc_link temp_link; bool pipes_to_unlock_first[MAX_PIPES]; /* Any of the pipes indicated here should be unlocked first */ } scratch; @@ -1652,170 +1817,6 @@ uint32_t dc_bandwidth_in_kbps_from_timing( const enum dc_link_encoding_format link_encoding); /* Link Interfaces */ -/* - * A link contains one or more sinks and their connected status. - * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. - */ -struct dc_link { - struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK]; - unsigned int sink_count; - struct dc_sink *local_sink; - unsigned int link_index; - enum dc_connection_type type; - enum signal_type connector_signal; - enum dc_irq_source irq_source_hpd; - enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ - - bool is_hpd_filter_disabled; - bool dp_ss_off; - - /** - * @link_state_valid: - * - * If there is no link and local sink, this variable should be set to - * false. Otherwise, it should be set to true; usually, the function - * core_link_enable_stream sets this field to true. - */ - bool link_state_valid; - bool aux_access_disabled; - bool sync_lt_in_progress; - bool skip_stream_reenable; - bool is_internal_display; - /** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */ - bool is_dig_mapping_flexible; - bool hpd_status; /* HPD status of link without physical HPD pin. */ - bool is_hpd_pending; /* Indicates a new received hpd */ - - /* USB4 DPIA links skip verifying link cap, instead performing the fallback method - * for every link training. This is incompatible with DP LL compliance automation, - * which expects the same link settings to be used every retry on a link loss. - * This flag is used to skip the fallback when link loss occurs during automation. - */ - bool skip_fallback_on_link_loss; - - bool edp_sink_present; - - struct dp_trace dp_trace; - - /* caps is the same as reported_link_cap. link_traing use - * reported_link_cap. Will clean up. TODO - */ - struct dc_link_settings reported_link_cap; - struct dc_link_settings verified_link_cap; - struct dc_link_settings cur_link_settings; - struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX]; - struct dc_link_settings preferred_link_setting; - /* preferred_training_settings are override values that - * come from DM. DM is responsible for the memory - * management of the override pointers. - */ - struct dc_link_training_overrides preferred_training_settings; - struct dp_audio_test_data audio_test_data; - - uint8_t ddc_hw_inst; - - uint8_t hpd_src; - - uint8_t link_enc_hw_inst; - /* DIG link encoder ID. Used as index in link encoder resource pool. - * For links with fixed mapping to DIG, this is not changed after dc_link - * object creation. - */ - enum engine_id eng_id; - enum engine_id dpia_preferred_eng_id; - - bool test_pattern_enabled; - /* Pending/Current test pattern are only used to perform and track - * FIXED_VS retimer test pattern/lane adjustment override state. - * Pending allows link HWSS to differentiate PHY vs non-PHY pattern, - * to perform specific lane adjust overrides before setting certain - * PHY test patterns. In cases when lane adjust and set test pattern - * calls are not performed atomically (i.e. performing link training), - * pending_test_pattern will be invalid or contain a non-PHY test pattern - * and current_test_pattern will contain required context for any future - * set pattern/set lane adjust to transition between override state(s). - * */ - enum dp_test_pattern current_test_pattern; - enum dp_test_pattern pending_test_pattern; - - union compliance_test_state compliance_test_state; - - void *priv; - - struct ddc_service *ddc; - - enum dp_panel_mode panel_mode; - bool aux_mode; - - /* Private to DC core */ - - const struct dc *dc; - - struct dc_context *ctx; - - struct panel_cntl *panel_cntl; - struct link_encoder *link_enc; - struct graphics_object_id link_id; - /* Endpoint type distinguishes display endpoints which do not have entries - * in the BIOS connector table from those that do. Helps when tracking link - * encoder to display endpoint assignments. - */ - enum display_endpoint_type ep_type; - union ddi_channel_mapping ddi_channel_mapping; - struct connector_device_tag_info device_tag; - struct dpcd_caps dpcd_caps; - uint32_t dongle_max_pix_clk; - unsigned short chip_caps; - unsigned int dpcd_sink_count; - struct hdcp_caps hdcp_caps; - enum edp_revision edp_revision; - union dpcd_sink_ext_caps dpcd_sink_ext_caps; - - struct psr_settings psr_settings; - struct replay_settings replay_settings; - - /* Drive settings read from integrated info table */ - struct dc_lane_settings bios_forced_drive_settings; - - /* Vendor specific LTTPR workaround variables */ - uint8_t vendor_specific_lttpr_link_rate_wa; - bool apply_vendor_specific_lttpr_link_rate_wa; - - /* MST record stream using this link */ - struct link_flags { - bool dp_keep_receiver_powered; - bool dp_skip_DID2; - bool dp_skip_reset_segment; - bool dp_skip_fs_144hz; - bool dp_mot_reset_segment; - /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ - bool dpia_mst_dsc_always_on; - /* Forced DPIA into TBT3 compatibility mode. */ - bool dpia_forced_tbt3_mode; - bool dongle_mode_timing_override; - bool blank_stream_on_ocs_change; - bool read_dpcd204h_on_irq_hpd; - bool force_dp_ffe_preset; - } wa_flags; - union dc_dp_ffe_preset forced_dp_ffe_preset; - struct link_mst_stream_allocation_table mst_stream_alloc_table; - - struct dc_link_status link_status; - struct dprx_states dprx_states; - - struct gpio *hpd_gpio; - enum dc_link_fec_state fec_state; - bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly - - struct dc_panel_config panel_config; - struct phy_state phy_state; - uint32_t phy_transition_bitmask; - // BW ALLOCATON USB4 ONLY - struct dc_dpia_bw_alloc dpia_bw_alloc_config; - bool skip_implict_edp_power_control; - enum backlight_control_type backlight_control_type; -}; - /* Return an enumerated dc_link. * dc_link order is constant and determined at * boot time. They cannot be created or destroyed. diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d0021f25f3d8a..c1c8742d4a58d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -507,7 +507,7 @@ struct resource_context { unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; bool is_mpc_3dlut_acquired[MAX_PIPES]; - /* solely used for build scalar data in dml2 */ + /* used to build scalar data in dml2 and for edp backlight programming */ struct pipe_ctx temp_pipe; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 1e4adbc764ea6..669ac1bc662cd 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -524,7 +524,7 @@ bool edp_set_backlight_level(const struct dc_link *link, struct dc *dc = link->ctx->dc; uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16; uint32_t frame_ramp = backlight_level_params->frame_ramp; - DC_LOGGER_INIT(link->ctx->logger); + DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); @@ -1130,11 +1130,11 @@ static struct abm *get_abm_from_stream_res(const struct dc_link *link) struct abm *abm = NULL; for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; - struct dc_stream_state *stream = pipe_ctx.stream; + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + struct dc_stream_state *stream = pipe_ctx->stream; if (stream && stream->link == link) { - abm = pipe_ctx.stream_res.abm; + abm = pipe_ctx->stream_res.abm; break; } } -- GitLab From a8f83d0c2d2bd7c065df6ffa07c5feb2b560094a Mon Sep 17 00:00:00 2001 From: "JinZe.Xu" Date: Wed, 12 Mar 2025 18:02:16 +0800 Subject: [PATCH 019/899] drm/amd/display: Use sync version of indirect register access. [Why] Access to indirect registers by DC and other components are not synchronized. [How] Use sync version of indirect register access. Reviewed-by: Nicholas Kazlauskas Signed-off-by: JinZe.Xu Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn315/dcn315_smu.c | 29 +++++++++---------- .../gpu/drm/amd/display/dc/inc/reg_helper.h | 4 +++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c index 2d14346b680e3..478b4d6a35440 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c @@ -49,12 +49,9 @@ static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E0000 { { 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0 } } } }; -static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0x0241B000, 0x04040000 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } } } }; + +#define CTX clk_mgr->base.ctx +#define IND_REG(offset) offset #define regBIF_BX_PF2_RSMU_INDEX 0x0000 #define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX 1 @@ -67,9 +64,6 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define FN(reg_name, field) \ FD(reg_name##__##field) -#define REG_NBIO(reg_name) \ - (NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name) - #undef DC_LOGGER #define DC_LOGGER \ CTX->logger @@ -77,6 +71,13 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define mmMP1_C2PMSG_3 0x3B1050C +#define reg__MP1_C2PMSG_3_MASK (0xFFFFFFFF) +#define reg__MP1_C2PMSG_3__SHIFT (0) + + +#define data_reg_name__MP1_C2PMSG_3_MASK (0xFFFFFFFF) +#define data_reg_name__MP1_C2PMSG_3__SHIFT (0) + #define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team #define VBIOSSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version #define VBIOSSMC_MSG_Spare0 0x03 ///< Spare0 @@ -153,12 +154,10 @@ static int dcn315_smu_send_msg_with_param( for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) { /* Trigger the message transaction by writing the message ID */ - generic_write_indirect_reg(CTX, - REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA), - mmMP1_C2PMSG_3, msg_id); - read_back_data = generic_read_indirect_reg(CTX, - REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA), - mmMP1_C2PMSG_3); + IX_REG_SET_SYNC(mmMP1_C2PMSG_3, 0, + MP1_C2PMSG_3, msg_id); + IX_REG_GET_SYNC(mmMP1_C2PMSG_3, + MP1_C2PMSG_3, &read_back_data); if (read_back_data == msg_id) break; udelay(2); diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index a402df225a76e..26cb1459b743f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -508,6 +508,10 @@ uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, initial_val, \ n, __VA_ARGS__) +#define IX_REG_SET_SYNC(index, init_value, f1, v1) \ + IX_REG_SET_N_SYNC(index, 1, init_value, \ + FN(reg, f1), v1) + #define IX_REG_SET_2_SYNC(index, init_value, f1, v1, f2, v2) \ IX_REG_SET_N_SYNC(index, 2, init_value, \ FN(reg, f1), v1,\ -- GitLab From eed269da71ee7e0c394646bd35c6d65e7b94019f Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 16 Mar 2025 10:46:52 -0500 Subject: [PATCH 020/899] drm/amd/display: DC v3.2.326 Summary: * DML 2.1 resync * Vblank disable fixes * Visual confirm debug improvements * Add command for reading ABM histogram * Bug fixes & improvements Signed-off-by: Aric Cyr Signed-off-by: Aurabindo Pillai Reviewed-by: Leo Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 13f3a83705925..0e98af9fb9e19 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -53,7 +53,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.325" +#define DC_VER "3.2.326" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- GitLab From 8e0793b6c58b7a67df21e092d90d9f93f6d35404 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 20 Mar 2025 10:31:32 +0530 Subject: [PATCH 021/899] drm/amdkfd: Use dev_* instead of pr_* for messages To get the device context, replace pr_ with dev_ functions. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 142 ++++++++++++------ .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 92 ++++++++---- .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 142 ++++++++++++++---- 3 files changed, 263 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 37b69fe0ede38..54870b4c50000 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -168,11 +168,12 @@ static bool event_interrupt_isr_v10(struct kfd_node *dev, client_id != SOC15_IH_CLIENTID_SE3SH) return false; - pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", - client_id, source_id, vmid, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); + dev_dbg(dev->adev->dev, + "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], data[4], data[5], data[6], + data[7]); /* If there is no valid PASID, it's likely a bug */ if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) @@ -217,37 +218,66 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n", - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1, - SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - WLT), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE_BUF0_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE_BUF1_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE_UTC_ERROR)); + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_AUTO_CTXID1, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + WLT), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_BUF0_FULL), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_BUF1_FULL), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SA_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SIMD_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - WGP_ID)); + dev_dbg_ratelimited( + dev->adev->dev, + "sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SA_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SIMD_ID), + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + WGP_ID)); if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) { if (kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), @@ -259,21 +289,37 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0, ERR_TYPE); - pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SA_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SIMD_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - WGP_ID), + dev_warn_ratelimited( + dev->adev->dev, + "sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SA_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SIMD_ID), + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + WGP_ID), sq_intr_err_type); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index c5f97e6e36ff5..eb88ba8d8b012 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -148,44 +148,69 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \ KFD_CTXID0_DOORBELL_ID_MASK) -static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) +static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0, + uint32_t context_id1) { - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR)); + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_BUF_FULL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + REG_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + CMD_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + HOST_CMD_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + HOST_REG_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + IMMED_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_UTC_ERROR)); } -static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) +static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0, + uint32_t context_id1) { - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, + SH_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID)); + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, + WAVE_ID), + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, + SIMD_ID), + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, + WGP_ID)); } -static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) +static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0, + uint32_t context_id1) { - pr_warn_ratelimited( + dev_warn_ratelimited( + dev->adev->dev, "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID)); + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + DETAIL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + TYPE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + PRIV), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + WAVE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, + SIMD_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, + WGP_ID)); } static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, @@ -255,11 +280,12 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev, (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG)) return false; - pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", - client_id, source_id, vmid, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); + dev_dbg(dev->adev->dev, + "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], data[4], data[5], data[6], + data[7]); /* If there is no valid PASID, it's likely a bug */ if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) @@ -353,10 +379,10 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); switch (sq_int_enc) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - print_sq_intr_info_auto(context_id0, context_id1); + print_sq_intr_info_auto(dev, context_id0, context_id1); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - print_sq_intr_info_inst(context_id0, context_id1); + print_sq_intr_info_inst(dev, context_id0, context_id1); sq_int_priv = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV); if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid, @@ -366,7 +392,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, return; break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: - print_sq_intr_info_error(context_id0, context_id1); + print_sq_intr_info_error(dev, context_id0, context_id1); sq_int_errtype = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE); if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index b8a91bf4ef307..4ceb251312a65 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -314,11 +314,12 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev, & ~pasid_mask) | pasid); } - pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", - client_id, source_id, vmid, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); + dev_dbg(dev->adev->dev, + "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], data[4], data[5], data[6], + data[7]); /* If there is no valid PASID, it's likely a bug */ if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) @@ -379,28 +380,82 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR)); + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + THREAD_TRACE), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + WLT), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + THREAD_TRACE_BUF_FULL), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + REG_TIMESTAMP), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + CMD_TIMESTAMP), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + HOST_CMD_OVERFLOW), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + HOST_REG_OVERFLOW), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + IMMED_OVERFLOW), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), + dev_dbg_ratelimited( + dev->adev->dev, + "sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SH_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SIMD_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + CU_ID), sq_int_data); if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) { if (kfd_set_dbg_ev_from_interrupt(dev, pasid, @@ -412,14 +467,37 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE); - pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), + dev_warn_ratelimited( + dev->adev->dev, + "sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SH_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SIMD_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + CU_ID), sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { -- GitLab From 921c040efeca35e21a836aa80fb2e3a1b8f64c97 Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Fri, 22 Nov 2024 15:19:16 +0800 Subject: [PATCH 022/899] drm/amd/pm: Add link reset for SMU 13.0.6 Add link reset implementation Signed-off-by: Ce Sun Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 28 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 26 +++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 23 +++++++++++++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 22 +++++++++++++++ 5 files changed, 98 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 81e9b443ca0ad..9f5768fa97dd0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -329,6 +329,34 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev) return ret; } +bool amdgpu_dpm_is_link_reset_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + bool support_link_reset = false; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + support_link_reset = smu_link_reset_is_support(smu); + mutex_unlock(&adev->pm.mutex); + } + + return support_link_reset; +} + +int amdgpu_dpm_link_reset(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = -EOPNOTSUPP; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + ret = smu_link_reset(smu); + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f93d287dbf137..747019ed2bf5c 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -414,11 +414,13 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); +int amdgpu_dpm_link_reset(struct amdgpu_device *adev); int amdgpu_dpm_enable_gfx_features(struct amdgpu_device *adev); int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_link_reset_supported(struct amdgpu_device *adev); int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 033c3229b555f..a01b6244d99cd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3412,6 +3412,19 @@ bool smu_mode2_reset_is_support(struct smu_context *smu) return ret; } +bool smu_link_reset_is_support(struct smu_context *smu) +{ + bool ret = false; + + if (!smu->pm_enabled) + return false; + + if (smu->ppt_funcs && smu->ppt_funcs->link_reset_is_support) + ret = smu->ppt_funcs->link_reset_is_support(smu); + + return ret; +} + int smu_mode1_reset(struct smu_context *smu) { int ret = 0; @@ -3442,6 +3455,19 @@ static int smu_mode2_reset(void *handle) return ret; } +int smu_link_reset(struct smu_context *smu) +{ + int ret = 0; + + if (!smu->pm_enabled) + return -EOPNOTSUPP; + + if (smu->ppt_funcs->link_reset) + ret = smu->ppt_funcs->link_reset(smu); + + return ret; +} + static int smu_enable_gfx_features(void *handle) { struct smu_context *smu = handle; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 3ba169639f546..27cecf9688ccb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -438,9 +438,11 @@ struct mclock_latency_table { }; enum smu_reset_mode { - SMU_RESET_MODE_0, - SMU_RESET_MODE_1, - SMU_RESET_MODE_2, + SMU_RESET_MODE_0, + SMU_RESET_MODE_1, + SMU_RESET_MODE_2, + SMU_RESET_MODE_3, + SMU_RESET_MODE_4, }; enum smu_baco_state { @@ -1228,11 +1230,17 @@ struct pptable_funcs { * @mode1_reset_is_support: Check if GPU supports mode1 reset. */ bool (*mode1_reset_is_support)(struct smu_context *smu); + /** * @mode2_reset_is_support: Check if GPU supports mode2 reset. */ bool (*mode2_reset_is_support)(struct smu_context *smu); + /** + * @link_reset_is_support: Check if GPU supports link reset. + */ + bool (*link_reset_is_support)(struct smu_context *smu); + /** * @mode1_reset: Perform mode1 reset. * @@ -1250,6 +1258,13 @@ struct pptable_funcs { /* for gfx feature enablement after mode2 reset */ int (*enable_gfx_features)(struct smu_context *smu); + /** + * @link_reset: Perform link reset. + * + * The gfx device driver reset + */ + int (*link_reset)(struct smu_context *smu); + /** * @get_dpm_ultimate_freq: Get the hard frequency range of a clock * domain in MHz. @@ -1601,7 +1616,9 @@ int smu_get_power_limit(void *handle, bool smu_mode1_reset_is_support(struct smu_context *smu); bool smu_mode2_reset_is_support(struct smu_context *smu); +bool smu_link_reset_is_support(struct smu_context *smu); int smu_mode1_reset(struct smu_context *smu); +int smu_link_reset(struct smu_context *smu); extern const struct amd_ip_funcs smu_ip_funcs; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index c478b3be37af1..9264dc33ee7e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2844,6 +2844,15 @@ static int smu_v13_0_6_mode1_reset(struct smu_context *smu) return ret; } +static int smu_v13_0_6_link_reset(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, + SMU_RESET_MODE_4, NULL); + return ret; +} + static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu) { return true; @@ -2854,6 +2863,17 @@ static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu) return true; } +static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int var = (adev->pdev->device & 0xF); + + if (var == 0x1) + return true; + + return false; +} + static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, uint32_t size) { @@ -3587,8 +3607,10 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, + .link_reset_is_support = smu_v13_0_6_is_link_reset_supported, .mode1_reset = smu_v13_0_6_mode1_reset, .mode2_reset = smu_v13_0_6_mode2_reset, + .link_reset = smu_v13_0_6_link_reset, .wait_for_event = smu_v13_0_wait_for_event, .i2c_init = smu_v13_0_6_i2c_control_init, .i2c_fini = smu_v13_0_6_i2c_control_fini, -- GitLab From 11bb33766f66d0cfe997601d6d974d0158a0e7b8 Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Thu, 20 Mar 2025 18:12:40 +0800 Subject: [PATCH 023/899] drm/amdgpu: refactor amdgpu_device_gpu_recover Split amdgpu_device_gpu_recover into the following stages: halt activities,asic reset,schedule resume and amdgpu resume. The reason is that the subsequent addition of dpc recover code will have a high similarity with gpu reset Signed-off-by: Ce Sun Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 234 +++++++++++++-------- 2 files changed, 150 insertions(+), 90 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2c04ae1338484..2db233a9f9721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -829,6 +829,10 @@ struct amdgpu_mqd { struct amdgpu_mqd_prop *p); }; +struct amdgpu_pcie_reset_ctx { + bool audio_suspended; +}; + /* * Custom Init levels could be defined for different situations where a full * initialization of all hardware blocks are not expected. Sample cases are @@ -1159,6 +1163,8 @@ struct amdgpu_device { struct pci_saved_state *pci_state; pci_channel_state_t pci_channel_state; + struct amdgpu_pcie_reset_ctx pcie_reset_ctx; + /* Track auto wait count on s_barrier settings */ bool barrier_has_auto_waitcnt; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6ebf6179064b7..1866d07db1331 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5930,94 +5930,40 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } -/** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler - * - * @adev: amdgpu_device pointer - * @job: which job trigger hang - * @reset_context: amdgpu reset context pointer - * - * Attempt to reset the GPU if it has hung (all asics). - * Attempt to do soft-reset or full-reset and reinitialize Asic - * Returns 0 for success or an error on failure. - */ - -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, +static int amdgpu_device_halt_activities(struct amdgpu_device *adev, struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context) + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, + struct amdgpu_hive_info *hive, + bool need_emergency_restart) { - struct list_head device_list, *device_list_handle = NULL; - bool job_signaled = false; - struct amdgpu_hive_info *hive = NULL; + struct list_head *device_list_handle = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; - bool need_emergency_restart = false; - bool audio_suspended = false; - int retry_limit = AMDGPU_MAX_RETRY_LIMIT; - /* - * If it reaches here because of hang/timeout and a RAS error is - * detected at the same time, let RAS recovery take care of it. - */ - if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && - !amdgpu_sriov_vf(adev) && - reset_context->src != AMDGPU_RESET_SRC_RAS) { - dev_dbg(adev->dev, - "Gpu recovery from source: %d yielding to RAS error recovery handling", - reset_context->src); - return 0; - } - /* - * Special case: RAS triggered and full reset isn't supported - */ - need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); - - /* - * Flush RAM to disk so that after reboot - * the user can read log and see why the system rebooted. - */ - if (need_emergency_restart && amdgpu_ras_get_context(adev) && - amdgpu_ras_get_context(adev)->reboot) { - DRM_WARN("Emergency reboot."); - - ksys_sync_helper(); - emergency_restart(); - } - - dev_info(adev->dev, "GPU %s begin!\n", - need_emergency_restart ? "jobs stop":"reset"); - - if (!amdgpu_sriov_vf(adev)) - hive = amdgpu_get_xgmi_hive(adev); - if (hive) - mutex_lock(&hive->hive_lock); - - reset_context->job = job; - reset_context->hive = hive; /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list * to put adev in the 1st position. */ - INIT_LIST_HEAD(&device_list); if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - list_add_tail(&tmp_adev->reset_list, &device_list); + list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; } - if (!list_is_first(&adev->reset_list, &device_list)) - list_rotate_to_front(&adev->reset_list, &device_list); - device_list_handle = &device_list; + if (!list_is_first(&adev->reset_list, device_list)) + list_rotate_to_front(&adev->reset_list, device_list); + device_list_handle = device_list; } else { - list_add_tail(&adev->reset_list, &device_list); - device_list_handle = &device_list; + list_add_tail(&adev->reset_list, device_list); + device_list_handle = device_list; } if (!amdgpu_sriov_vf(adev)) { r = amdgpu_device_health_check(device_list_handle); if (r) - goto end_reset; + return r; } /* We need to lock reset domain only once both for XGMI and single device */ @@ -6041,7 +5987,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * some audio codec errors. */ if (!amdgpu_device_suspend_display_audio(tmp_adev)) - audio_suspended = true; + tmp_adev->pcie_reset_ctx.audio_suspended = true; amdgpu_ras_set_error_query_ready(tmp_adev, false); @@ -6076,23 +6022,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, atomic_inc(&tmp_adev->gpu_reset_counter); } - if (need_emergency_restart) - goto skip_sched_resume; + return r; +} - /* - * Must check guilty signal here since after this point all old - * HW fences are force signaled. - * - * job->base holds a reference to parent fence - */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { - job_signaled = true; - dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); - goto skip_hw_reset; - } +static int amdgpu_device_asic_reset(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *tmp_adev = NULL; + int retry_limit = AMDGPU_MAX_RETRY_LIMIT; + int r = 0; retry: /* Rest of adevs pre asic reset from XGMI hive. */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { @@ -6119,12 +6061,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (r) adev->asic_reset_res = r; } else { - r = amdgpu_do_asic_reset(device_list_handle, reset_context); + r = amdgpu_do_asic_reset(device_list, reset_context); if (r && r == -EAGAIN) goto retry; } - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { /* * Drop any pending non scheduler resets queued before reset is done. * Any reset scheduled after this point would be valid. Scheduler resets @@ -6134,10 +6076,18 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_device_stop_pending_resets(tmp_adev); } -skip_hw_reset: + return r; +} + +static int amdgpu_device_sched_resume(struct list_head *device_list, + struct amdgpu_reset_context *reset_context, + bool job_signaled) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; /* Post ASIC reset for all devs .*/ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -6173,8 +6123,16 @@ skip_hw_reset: } } -skip_sched_resume: - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + return r; +} + +static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, + struct list_head *device_list, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + + list_for_each_entry(tmp_adev, device_list, reset_list) { /* unlock kfd: SRIOV would do it separately */ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); @@ -6185,18 +6143,114 @@ skip_sched_resume: if (!adev->kfd.init_complete) amdgpu_amdkfd_device_init(adev); - if (audio_suspended) + if (tmp_adev->pcie_reset_ctx.audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_unset_mp1_state(tmp_adev); amdgpu_ras_set_error_query_ready(tmp_adev, true); + } - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + tmp_adev = list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} + + +/** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * + * @adev: amdgpu_device pointer + * @job: which job trigger hang + * @reset_context: amdgpu reset context pointer + * + * Attempt to reset the GPU if it has hung (all asics). + * Attempt to do soft-reset or full-reset and reinitialize Asic + * Returns 0 for success or an error on failure. + */ + +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context) +{ + struct list_head device_list; + bool job_signaled = false; + struct amdgpu_hive_info *hive = NULL; + int r = 0; + bool need_emergency_restart = false; + + /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + + /* + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (need_emergency_restart && amdgpu_ras_get_context(adev) && + amdgpu_ras_get_context(adev)->reboot) { + DRM_WARN("Emergency reboot."); + + ksys_sync_helper(); + emergency_restart(); + } + + dev_info(adev->dev, "GPU %s begin!\n", + need_emergency_restart ? "jobs stop":"reset"); + + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); + if (hive) + mutex_lock(&hive->hive_lock); + + reset_context->job = job; + reset_context->hive = hive; + INIT_LIST_HEAD(&device_list); + + r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, + hive, need_emergency_restart); + if (r) + goto end_reset; + + if (need_emergency_restart) + goto skip_sched_resume; + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. + * + * job->base holds a reference to parent fence + */ + if (job && dma_fence_is_signaled(&job->hw_fence)) { + job_signaled = true; + dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); + goto skip_hw_reset; + } + + r = amdgpu_device_asic_reset(adev, &device_list, reset_context); + if (r) + goto end_reset; +skip_hw_reset: + r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); + if (r) + goto end_reset; +skip_sched_resume: + amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); -- GitLab From 8ba904f54148d2a093650b13a229d2ff18142f7d Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Fri, 21 Mar 2025 10:11:18 +0800 Subject: [PATCH 024/899] drm/amdgpu: Multi-GPU DPC recovery support Add support for DPC recover based on refactored code Signed-off-by: Ce Sun Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 172 ++++++++++++++------- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 + 3 files changed, 125 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2db233a9f9721..9a9e5249c63f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -551,6 +551,7 @@ struct amdgpu_allowed_register_entry { * are reset depends on the ASIC. Notably doesn't reset IPs * shared with the CPU on APUs or the memory controllers (so * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card * but without powering off the PCI bus. Suitable only for * discrete GPUs. @@ -568,6 +569,7 @@ enum amd_reset_method { AMD_RESET_METHOD_MODE0, AMD_RESET_METHOD_MODE1, AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_LINK, AMD_RESET_METHOD_BACO, AMD_RESET_METHOD_PCI, AMD_RESET_METHOD_ON_INIT, @@ -830,6 +832,8 @@ struct amdgpu_mqd { }; struct amdgpu_pcie_reset_ctx { + bool in_link_reset; + bool occurs_dpc; bool audio_suspended; }; @@ -1469,6 +1473,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); int amdgpu_device_mode1_reset(struct amdgpu_device *adev); +int amdgpu_device_link_reset(struct amdgpu_device *adev); bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1866d07db1331..7525128f971f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3172,6 +3172,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * always assumed to be lost. */ switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_LINK: case AMD_RESET_METHOD_BACO: case AMD_RESET_METHOD_MODE1: return true; @@ -5510,6 +5511,29 @@ mode1_reset_failed: return ret; } +int amdgpu_device_link_reset(struct amdgpu_device *adev) +{ + int ret = 0; + + dev_info(adev->dev, "GPU link reset\n"); + + if (!adev->pcie_reset_ctx.occurs_dpc) + ret = amdgpu_dpm_link_reset(adev); + + if (ret) + goto link_reset_failed; + + ret = amdgpu_psp_wait_for_bootloader(adev); + if (ret) + goto link_reset_failed; + + return 0; + +link_reset_failed: + dev_err(adev->dev, "GPU link reset failed\n"); + return ret; +} + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { @@ -5814,6 +5838,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: + case AMD_RESET_METHOD_LINK: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; break; case AMD_RESET_METHOD_MODE2: @@ -5951,6 +5976,8 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->pcie_reset_ctx.in_link_reset = true; } if (!list_is_first(&adev->reset_list, device_list)) list_rotate_to_front(&adev->reset_list, device_list); @@ -5960,7 +5987,7 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, device_list_handle = device_list; } - if (!amdgpu_sriov_vf(adev)) { + if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { r = amdgpu_device_health_check(device_list_handle); if (r) return r; @@ -6005,6 +6032,7 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, /* disable ras on ALL IPs */ if (!need_emergency_restart && + (!adev->pcie_reset_ctx.occurs_dpc) && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -6035,7 +6063,11 @@ static int amdgpu_device_asic_reset(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list, reset_list) { + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = false; /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -6634,12 +6666,15 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_reset_context reset_context; + struct list_head device_list; + int r = 0; - DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); + dev_info(adev->dev, "PCI error: detected callback!!\n"); - if (adev->gmc.xgmi.num_physical_nodes > 1) { - DRM_WARN("No support for XGMI hive yet..."); + if (!amdgpu_dpm_is_link_reset_supported(adev)) { + dev_warn(adev->dev, "No support for XGMI hive yet...\n"); return PCI_ERS_RESULT_DISCONNECT; } @@ -6647,32 +6682,30 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta switch (state) { case pci_channel_io_normal: + dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state); return PCI_ERS_RESULT_CAN_RECOVER; - /* Fatal error, prepare for slot reset */ case pci_channel_io_frozen: - /* - * Locking adev->reset_domain->sem will prevent any external access - * to GPU during PCI error recovery - */ - amdgpu_device_lock_reset_domain(adev->reset_domain); - amdgpu_device_set_mp1_state(adev); - - /* - * Block any work scheduling as we do for regular GPU reset - * for the duration of the recovery - */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!amdgpu_ring_sched_ready(ring)) - continue; - - drm_sched_stop(&ring->sched, NULL); + /* Fatal error, prepare for slot reset */ + dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); + + if (hive) + mutex_lock(&hive->hive_lock); + adev->pcie_reset_ctx.occurs_dpc = true; + memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); + + r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, + hive, false); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } - atomic_inc(&adev->gpu_reset_counter); + if (r) + return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ + dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state); return PCI_ERS_RESULT_DISCONNECT; } @@ -6685,8 +6718,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta */ pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) { + struct drm_device *dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(dev); - DRM_INFO("PCI error: mmio enabled callback!!\n"); + dev_info(adev->dev, "PCI error: mmio enabled callback!!\n"); /* TODO - dump whatever for debugging purposes */ @@ -6710,10 +6745,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int r, i; struct amdgpu_reset_context reset_context; - u32 memsize; + struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_hive_info *hive = NULL; struct list_head device_list; + int r = 0, i; + u32 memsize; /* PCI error slot reset should be skipped During RAS recovery */ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || @@ -6721,15 +6758,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) amdgpu_ras_in_recovery(adev)) return PCI_ERS_RESULT_RECOVERED; - DRM_INFO("PCI error: slot reset callback!!\n"); + dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); - INIT_LIST_HEAD(&device_list); - list_add_tail(&adev->reset_list, &device_list); - /* wait for asic to come out of reset */ - msleep(500); + msleep(700); /* Restore PCI confspace */ amdgpu_device_load_pci_state(pdev); @@ -6750,26 +6784,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - - adev->no_hw_access = true; - r = amdgpu_device_pre_asic_reset(adev, &reset_context); - adev->no_hw_access = false; - if (r) - goto out; + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + INIT_LIST_HEAD(&device_list); - r = amdgpu_do_asic_reset(&device_list, &reset_context); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + reset_context.hive = hive; + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = true; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else { + set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + list_add_tail(&adev->reset_list, &device_list); + } + r = amdgpu_device_asic_reset(adev, &device_list, &reset_context); out: if (!r) { if (amdgpu_device_cache_pci_state(adev->pdev)) pci_restore_state(adev->pdev); - - DRM_INFO("PCIe error recovery succeeded\n"); + dev_info(adev->dev, "PCIe error recovery succeeded\n"); } else { - DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r); + if (tmp_adev) { + list_for_each_entry(tmp_adev, &device_list, reset_list) + amdgpu_device_unset_mp1_state(tmp_adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); + } + } + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -6786,26 +6834,36 @@ void amdgpu_pci_resume(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; - + struct list_head device_list; + struct amdgpu_hive_info *hive = NULL; + struct amdgpu_device *tmp_adev = NULL; - DRM_INFO("PCI error: resume callback!!\n"); + dev_info(adev->dev, "PCI error: resume callback!!\n"); /* Only continue execution for the case of pci_channel_io_frozen */ if (adev->pci_channel_state != pci_channel_io_frozen) return; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + INIT_LIST_HEAD(&device_list); - if (!amdgpu_ring_sched_ready(ring)) - continue; + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = false; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else + list_add_tail(&adev->reset_list, &device_list); - drm_sched_start(&ring->sched, 0); - } + amdgpu_device_sched_resume(&device_list, NULL, NULL); + amdgpu_device_gpu_resume(adev, &device_list, false); + adev->pcie_reset_ctx.occurs_dpc = false; - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); + } } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 659eab9b90bec..c457be3a3c56f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,6 +584,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev) * Enable triggering of GPU reset only if specified * by module parameter. */ + if (adev->pcie_reset_ctx.in_link_reset) + return AMD_RESET_METHOD_LINK; if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5) return AMD_RESET_METHOD_MODE2; else if (!(adev->flags & AMD_IS_APU)) @@ -640,6 +642,9 @@ asic_reset: case AMD_RESET_METHOD_MODE2: dev_info(adev->dev, "MODE2 reset\n"); return amdgpu_dpm_mode2_reset(adev); + case AMD_RESET_METHOD_LINK: + dev_info(adev->dev, "Link reset\n"); + return amdgpu_device_link_reset(adev); default: dev_info(adev->dev, "MODE1 reset\n"); return amdgpu_device_mode1_reset(adev); -- GitLab From 969fd18c8d2491d9599176a9cca7e386f823144e Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Tue, 31 Dec 2024 12:53:13 +0800 Subject: [PATCH 025/899] drm/amdgpu/vcn: during dpc recovery will corrupt VCPU buffer err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to restore fw data and clear buffer in amdgpu_vcn_resume() Signed-off-by: Ce Sun Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 1991dd3d1056a..c8885c3d54b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -353,9 +353,9 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); - /* err_event_athub will corrupt VCPU buffer, so we need to + /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ - if (in_ras_intr) + if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) return 0; return amdgpu_vcn_save_vcpu_bo_inst(adev, i); -- GitLab From 43f668edae4a536f41c11d675ded958d9862a9a4 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Fri, 21 Mar 2025 17:00:09 +0530 Subject: [PATCH 026/899] drm/amd/display: add proper error message for vblank init v1 - DRM_ERROR to dev_err (Mario) Update message to identifiy the vblank initialization fail case Signed-off-by: Saleemkhan Jamadar Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9fed4471405f8..0e0e9039e0115 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2241,8 +2241,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size; if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { - DRM_ERROR( - "amdgpu: failed to initialize sw for display support.\n"); + dev_err(adev->dev, + "amdgpu: failed to initialize vblank sw for display support.\n"); goto error; } -- GitLab From cc9428d5336aec746ffeb3d13ee2cf49756b157d Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Fri, 21 Mar 2025 17:00:09 +0530 Subject: [PATCH 027/899] drm/amd/display: add proper error message for vblank init v1 - DRM_ERROR to drm_err (Mario) Update message to identifiy the vblank initialization fail case Signed-off-by: Saleemkhan Jamadar Reviewed-by: Rodrigo Siqueira Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0e0e9039e0115..8df93d23b723d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2241,7 +2241,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size; if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { - dev_err(adev->dev, + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize vblank sw for display support.\n"); goto error; } -- GitLab From 62e0b8f76651f5a85f8524fec73027a8f6407128 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sat, 22 Mar 2025 14:37:42 -0400 Subject: [PATCH 028/899] drm/amdgpu: use gmc_v7_0_is_idle() since it is available under GMC7 gmc_v7_0_is_idle() does exactly what we need, so use it. v2: fix parameter (Alex) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 83e39f16044a1..a8d5795084fc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1157,17 +1157,10 @@ static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block) static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; - u32 tmp; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - /* read MC_STATUS */ - tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | - SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | - SRBM_STATUS__MCD_BUSY_MASK | - SRBM_STATUS__VMC_BUSY_MASK); - if (!tmp) + if (gmc_v7_0_is_idle(ip_block)) return 0; udelay(1); } -- GitLab From 60c53fe7bc13fb1ecf1d886c073a973371cb3c4a Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sat, 22 Mar 2025 14:37:43 -0400 Subject: [PATCH 029/899] drm/amdgpu: use cik_sdma_is_idle() in CIK SDMA cik_sdma_is_idle() does exactly what we need, so use it. V2: fix parameter (Alex) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 508cea9659832..81a1f7fb6d78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1040,14 +1040,10 @@ static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block) static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - u32 tmp; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | - SRBM_STATUS2__SDMA1_BUSY_MASK); - - if (!tmp) + if (cik_sdma_is_idle(ip_block)) return 0; udelay(1); } -- GitLab From e319f9ec360008e5f0da5e8f15f2739d5c6ad93a Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sat, 22 Mar 2025 14:37:44 -0400 Subject: [PATCH 030/899] drm/amdgpu: small cleanup to CIK SDMA Tidy cik_sdma_hw_init() by returning directly cik_sdma_start()'s result. Keep amdgpu_cik_gpu_check_soft_reset() early declaration with others. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 81a1f7fb6d78c..9e8715b4739da 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -56,6 +56,8 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block); +u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); + MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin"); MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin"); MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin"); @@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin"); MODULE_FIRMWARE("amdgpu/mullins_sdma.bin"); MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin"); -u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); - - static void cik_sdma_free_microcode(struct amdgpu_device *adev) { int i; @@ -993,14 +992,9 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block) static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block) { - int r; struct amdgpu_device *adev = ip_block->adev; - r = cik_sdma_start(adev); - if (r) - return r; - - return r; + return cik_sdma_start(adev); } static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block) -- GitLab From 1be0ae9e12b4e20210e7c9a6544cfde78e5d1706 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sat, 22 Mar 2025 14:37:45 -0400 Subject: [PATCH 031/899] drm/amdgpu: move X_GB_ADDR_CONFIG_GOLDEN in GFX7 [BONAIRE|HAWAII]_GB_ADDR_CONFIG_GOLDEN are only used by GFX7. So keep them where they are needed. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cikd.h | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 2792883659408..8aca4f2734f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -60,9 +60,6 @@ #define AUD5_REGISTER_OFFSET (0x179d - 0x1780) #define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780) -#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 -#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8181bd0e4f189..6eb48ebd3f4ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -55,6 +55,9 @@ #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 +#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 +#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 + static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); -- GitLab From 9aadb02fa2ecf9cc38534c6d203f61126c5a66d7 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sat, 22 Mar 2025 14:39:00 -0400 Subject: [PATCH 032/899] drm/radeon: fix MAX_POWER_SHIFT value While I don't think it is being used anywhere, if it were used, it would be wrong. We can base this assumption on MAX_POWER_MASK, where the shift is by 16 bits. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/sid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 65a911ddd509d..f9267b026f8de 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1109,7 +1109,7 @@ #define MIN_POWER_SHIFT 0 #define MAX_POWER(x) ((x) << 16) #define MAX_POWER_MASK (0x3fff << 16) -#define MAX_POWER_SHIFT 0 +#define MAX_POWER_SHIFT 16 #define SQ_POWER_THROTTLE2 0x8e5c #define MAX_POWER_DELTA(x) ((x) << 0) #define MAX_POWER_DELTA_MASK (0x3fff << 0) -- GitLab From 8e46cabf8ecef33b9e4e0977931e94c217ec6e87 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:43 -0400 Subject: [PATCH 033/899] drm/amdgpu: move GFX6 defines into gfx_v6_0.c Send a few GFX6 defines where it's used in GFX6. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/si_enums.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 13fbee46417af..fedf885955978 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -53,6 +53,9 @@ #define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 #define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 +#define GFX6_NUM_GFX_RINGS 1 +#define GFX6_NUM_COMPUTE_RINGS 2 + static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index d656ef1fa6e19..b44a32bacd5c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -148,8 +148,6 @@ #define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) #define WAIT_REG_MEM_ENGINE(x) ((x) << 8) -#define GFX6_NUM_GFX_RINGS 1 -#define GFX6_NUM_COMPUTE_RINGS 2 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D -- GitLab From de81b86e965e778533d9e7db6f4fe6451fb37974 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:44 -0400 Subject: [PATCH 034/899] drm/amdgpu: wire up defines, shifts and masks through SI code To be able to remove as much duplicated defines, the different files containing definitions, shifts and masks must be properly included. Once done, the code will be migrated where needed to shifts and masks and proper defines, before removing useless defines in the end. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 18 ++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/si_dma.c | 2 ++ drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 + drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 20 ++++++++++++++++++-- drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c | 6 ++++++ 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 2247f6a94858a..448f246f45374 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -35,6 +35,7 @@ #include "amdgpu_vce.h" #include "atom.h" #include "amd_pcie.h" + #include "si_dpm.h" #include "sid.h" #include "si_ih.h" @@ -44,17 +45,30 @@ #include "dce_v6_0.h" #include "si.h" #include "uvd_v3_1.h" -#include "amdgpu_vkms.h" + +#include "uvd/uvd_4_0_d.h" + +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gmc/gmc_6_0_d.h" +#include"gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" -#include "uvd/uvd_4_0_d.h" +#include "dce/dce_6_0_sh_mask.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" +#include "si_enums.h" #include "amdgpu_dm.h" +#include "amdgpu_vkms.h" static const u32 tahiti_golden_registers[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index e2089c8da71b5..552efcc7764fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -27,6 +27,8 @@ #include "si.h" #include "sid.h" +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { DMA0_REGISTER_OFFSET, diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 5c38e1fb1dca7..53b2c3f039a8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -27,6 +27,7 @@ #include "amdgpu_ih.h" #include "sid.h" #include "si_ih.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 1c25f3023e935..c5db5cb8c44fa 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -30,16 +30,32 @@ #include "amdgpu_atombios.h" #include "amdgpu_dpm_internal.h" #include "amd_pcie.h" -#include "sid.h" +#include "atom.h" +#include "gfx_v6_0.h" #include "r600_dpm.h" +#include "sid.h" #include "si_dpm.h" -#include "atom.h" #include "../include/pptable.h" #include #include #include #include +#include "bif/bif_3_0_d.h" +#include "bif/bif_3_0_sh_mask.h" + +#include "dce/dce_6_0_d.h" +#include "dce/dce_6_0_sh_mask.h" + +#include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + +#include"gmc/gmc_6_0_d.h" +#include"gmc/gmc_6_0_sh_mask.h" + +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + #define MC_CG_ARB_FREQ_F0 0x0a #define MC_CG_ARB_FREQ_F1 0x0b #define MC_CG_ARB_FREQ_F2 0x0c diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c index 8f994ffa9cd11..c712899c44cac 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c @@ -30,6 +30,12 @@ #include "amdgpu_ucode.h" #include "sislands_smc.h" +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + +#include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + static int si_set_smc_sram_address(struct amdgpu_device *adev, u32 smc_address, u32 limit) { -- GitLab From 193e08801525a4100373c8a432ead3193904d2d5 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:45 -0400 Subject: [PATCH 035/899] drm/amdgpu: use proper defines, shifts and masks in DCE6 code By replacing VGA_VSTATUS_CNTL by VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK, we also need to fix its usage in GMC6. Note: VGA_VSTATUS_CNTL's binary value was inverted in dce_6_0_sh_mask.h, so we need to invert its value where it was used. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 18 ++++++++--------- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/si_enums.h | 5 ----- drivers/gpu/drm/amd/amdgpu/sid.h | 20 ------------------- .../include/asic_reg/dce/dce_6_0_sh_mask.h | 2 ++ 5 files changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 255c709593433..98d6ffcfefe3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -412,7 +412,7 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, { if (!render) WREG32(mmVGA_RENDER_CONTROL, - RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL); + RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK); } static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) @@ -2108,7 +2108,7 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc, if (mode->flags & DRM_MODE_FLAG_INTERLACE) WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, - INTERLEAVE_EN); + DATA_FORMAT__INTERLEAVE_EN_MASK); else WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0); } @@ -2162,7 +2162,7 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | - ICON_DEGAMMA_MODE(0) | + (0 << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) | (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | @@ -2986,12 +2986,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: interrupt_mask = RREG32(mmINT_MASK + reg_block); - interrupt_mask &= ~VBLANK_INT_MASK; + interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK; WREG32(mmINT_MASK + reg_block, interrupt_mask); break; case AMDGPU_IRQ_STATE_ENABLE: interrupt_mask = RREG32(mmINT_MASK + reg_block); - interrupt_mask |= VBLANK_INT_MASK; + interrupt_mask |= INT_MASK__VBLANK_INT_MASK; WREG32(mmINT_MASK + reg_block, interrupt_mask); break; default: @@ -3021,12 +3021,12 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); - dc_hpd_int_cntl &= ~DC_HPDx_INT_EN; + dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); - dc_hpd_int_cntl |= DC_HPDx_INT_EN; + dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); break; default: @@ -3096,7 +3096,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) - WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK); + WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK); else DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); @@ -3107,7 +3107,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, break; case 1: /* vline */ if (disp_int & interrupt_status_offsets[crtc].vline) - WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK); + WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK); else DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a992e79d9581b..b4567d619d098 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) /* disable VGA render */ tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp &= ~VGA_VSTATUS_CNTL; + tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK; WREG32(mmVGA_RENDER_CONTROL, tmp); } /* Update configuration */ diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index b44a32bacd5c0..f6804c9b7a273 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -23,23 +23,18 @@ #ifndef SI_ENUMS_H #define SI_ENUMS_H -#define VBLANK_INT_MASK (1 << 0) -#define DC_HPDx_INT_EN (1 << 16) #define VBLANK_ACK (1 << 4) #define VLINE_ACK (1 << 4) #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 -#define VGA_VSTATUS_CNTL 0xFFFCFFFF #define PRIORITY_MARK_MASK 0x7fff #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) -#define INTERLEAVE_EN (1 << 0) #define LATENCY_WATERMARK_MASK(x) ((x) << 16) #define DC_LB_MEMORY_CONFIG(x) ((x) << 20) -#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8) #define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) #define GRPH_ENDIAN_NONE 0 diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index cbf232f5235b7..00eb40d4c1a2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -787,26 +787,6 @@ # define LATENCY_LOW_WATERMARK(x) ((x) << 0) # define LATENCY_HIGH_WATERMARK(x) ((x) << 16) -/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */ -#define VLINE_STATUS 0x1AEE -# define VLINE_OCCURRED (1 << 0) -# define VLINE_ACK (1 << 4) -# define VLINE_STAT (1 << 12) -# define VLINE_INTERRUPT (1 << 16) -# define VLINE_INTERRUPT_TYPE (1 << 17) -/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */ -#define VBLANK_STATUS 0x1AEF -# define VBLANK_OCCURRED (1 << 0) -# define VBLANK_ACK (1 << 4) -# define VBLANK_STAT (1 << 12) -# define VBLANK_INTERRUPT (1 << 16) -# define VBLANK_INTERRUPT_TYPE (1 << 17) - -/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */ -#define INT_MASK 0x1AD0 -# define VBLANK_INT_MASK (1 << 0) -# define VLINE_INT_MASK (1 << 4) - #define DISP_INTERRUPT_STATUS 0x183D # define LB_D1_VLINE_INTERRUPT (1 << 2) # define LB_D1_VBLANK_INTERRUPT (1 << 3) diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h index bd8085ec54ed5..2d6a598a6c25c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h @@ -5242,6 +5242,8 @@ #define DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT 0x0000000c #define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE_MASK 0x00000003L #define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT 0x00000000 +#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE_MASK 0x00000300L +#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT 0x00000008 #define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE_MASK 0x00000030L #define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT 0x00000004 #define DENORM_CONTROL__DENORM_MODE_MASK 0x00000007L -- GitLab From cbd8207e234b6920cb4463bbf8cd547dcb9ff7fc Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:46 -0400 Subject: [PATCH 036/899] drm/amdgpu: remove PACKET3 duplicated defines from si_enums.h PACKET3 is already in sid.h, as it is done under cikd.h for CIK Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_enums.h | 123 -------------------------- 1 file changed, 123 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index f6804c9b7a273..7c4dc9c5373ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -146,127 +146,4 @@ #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D -#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ - (((op) & 0xFF) << 8) | \ - ((n) & 0x3FFF) << 16) -#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) -#define PACKET3_NOP 0x10 -#define PACKET3_SET_BASE 0x11 -#define PACKET3_BASE_INDEX(x) ((x) << 0) -#define PACKET3_CLEAR_STATE 0x12 -#define PACKET3_INDEX_BUFFER_SIZE 0x13 -#define PACKET3_DISPATCH_DIRECT 0x15 -#define PACKET3_DISPATCH_INDIRECT 0x16 -#define PACKET3_ALLOC_GDS 0x1B -#define PACKET3_WRITE_GDS_RAM 0x1C -#define PACKET3_ATOMIC_GDS 0x1D -#define PACKET3_ATOMIC 0x1E -#define PACKET3_OCCLUSION_QUERY 0x1F -#define PACKET3_SET_PREDICATION 0x20 -#define PACKET3_REG_RMW 0x21 -#define PACKET3_COND_EXEC 0x22 -#define PACKET3_PRED_EXEC 0x23 -#define PACKET3_DRAW_INDIRECT 0x24 -#define PACKET3_DRAW_INDEX_INDIRECT 0x25 -#define PACKET3_INDEX_BASE 0x26 -#define PACKET3_DRAW_INDEX_2 0x27 -#define PACKET3_CONTEXT_CONTROL 0x28 -#define PACKET3_INDEX_TYPE 0x2A -#define PACKET3_DRAW_INDIRECT_MULTI 0x2C -#define PACKET3_DRAW_INDEX_AUTO 0x2D -#define PACKET3_DRAW_INDEX_IMMD 0x2E -#define PACKET3_NUM_INSTANCES 0x2F -#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 -#define PACKET3_INDIRECT_BUFFER_CONST 0x31 -#define PACKET3_INDIRECT_BUFFER 0x3F -#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 -#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 -#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36 -#define PACKET3_WRITE_DATA 0x37 -#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 -#define PACKET3_MEM_SEMAPHORE 0x39 -#define PACKET3_MPEG_INDEX 0x3A -#define PACKET3_COPY_DW 0x3B -#define PACKET3_WAIT_REG_MEM 0x3C -#define PACKET3_MEM_WRITE 0x3D -#define PACKET3_COPY_DATA 0x40 -#define PACKET3_CP_DMA 0x41 -# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) -# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) -# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29) -# define PACKET3_CP_DMA_CP_SYNC (1 << 31) -# define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) -# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) -# define PACKET3_CP_DMA_CMD_SAS (1 << 26) -# define PACKET3_CP_DMA_CMD_DAS (1 << 27) -# define PACKET3_CP_DMA_CMD_SAIC (1 << 28) -# define PACKET3_CP_DMA_CMD_DAIC (1 << 29) -# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30) -#define PACKET3_PFP_SYNC_ME 0x42 -#define PACKET3_SURFACE_SYNC 0x43 -# define PACKET3_DEST_BASE_0_ENA (1 << 0) -# define PACKET3_DEST_BASE_1_ENA (1 << 1) -# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) -# define PACKET3_CB1_DEST_BASE_ENA (1 << 7) -# define PACKET3_CB2_DEST_BASE_ENA (1 << 8) -# define PACKET3_CB3_DEST_BASE_ENA (1 << 9) -# define PACKET3_CB4_DEST_BASE_ENA (1 << 10) -# define PACKET3_CB5_DEST_BASE_ENA (1 << 11) -# define PACKET3_CB6_DEST_BASE_ENA (1 << 12) -# define PACKET3_CB7_DEST_BASE_ENA (1 << 13) -# define PACKET3_DB_DEST_BASE_ENA (1 << 14) -# define PACKET3_DEST_BASE_2_ENA (1 << 19) -# define PACKET3_DEST_BASE_3_ENA (1 << 21) -# define PACKET3_TCL1_ACTION_ENA (1 << 22) -# define PACKET3_TC_ACTION_ENA (1 << 23) -# define PACKET3_CB_ACTION_ENA (1 << 25) -# define PACKET3_DB_ACTION_ENA (1 << 26) -# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27) -# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29) -#define PACKET3_ME_INITIALIZE 0x44 -#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) -#define PACKET3_COND_WRITE 0x45 -#define PACKET3_EVENT_WRITE 0x46 -#define PACKET3_EVENT_WRITE_EOP 0x47 -#define PACKET3_EVENT_WRITE_EOS 0x48 -#define PACKET3_PREAMBLE_CNTL 0x4A -# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) -# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) -#define PACKET3_ONE_REG_WRITE 0x57 -#define PACKET3_LOAD_CONFIG_REG 0x5F -#define PACKET3_LOAD_CONTEXT_REG 0x60 -#define PACKET3_LOAD_SH_REG 0x61 -#define PACKET3_SET_CONFIG_REG 0x68 -#define PACKET3_SET_CONFIG_REG_START 0x00002000 -#define PACKET3_SET_CONFIG_REG_END 0x00002c00 -#define PACKET3_SET_CONTEXT_REG 0x69 -#define PACKET3_SET_CONTEXT_REG_START 0x000a000 -#define PACKET3_SET_CONTEXT_REG_END 0x000a400 -#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 -#define PACKET3_SET_RESOURCE_INDIRECT 0x74 -#define PACKET3_SET_SH_REG 0x76 -#define PACKET3_SET_SH_REG_START 0x00002c00 -#define PACKET3_SET_SH_REG_END 0x00003000 -#define PACKET3_SET_SH_REG_OFFSET 0x77 -#define PACKET3_ME_WRITE 0x7A -#define PACKET3_SCRATCH_RAM_WRITE 0x7D -#define PACKET3_SCRATCH_RAM_READ 0x7E -#define PACKET3_CE_WRITE 0x7F -#define PACKET3_LOAD_CONST_RAM 0x80 -#define PACKET3_WRITE_CONST_RAM 0x81 -#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82 -#define PACKET3_DUMP_CONST_RAM 0x83 -#define PACKET3_INCREMENT_CE_COUNTER 0x84 -#define PACKET3_INCREMENT_DE_COUNTER 0x85 -#define PACKET3_WAIT_ON_CE_COUNTER 0x86 -#define PACKET3_WAIT_ON_DE_COUNTER 0x87 -#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 -#define PACKET3_SET_CE_DE_COUNTERS 0x89 -#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A -#define PACKET3_SWITCH_BUFFER 0x8B -#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12) -#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) -#define PACKET3_SEM_SEL_WAIT (0x7 << 29) - #endif -- GitLab From c82d915fe1397392fa64daff8fd7f5c80c29bed1 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:47 -0400 Subject: [PATCH 037/899] drm/amdgpu: move si_ih.c away from sid.h defines They are properly defined under oss_1_0_d.h Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_ih.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 53b2c3f039a8a..1df00f8a2406a 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -214,7 +214,7 @@ static int si_ih_resume(struct amdgpu_ip_block *ip_block) static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS); + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) return false; @@ -240,23 +240,23 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; - u32 tmp = RREG32(SRBM_STATUS); + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; if (srbm_soft_reset) { - tmp = RREG32(SRBM_SOFT_RESET); + tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); udelay(50); tmp &= ~srbm_soft_reset; - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); udelay(50); } -- GitLab From 76eb396db301c0e770a872a21a8a8e050ba36ae3 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:48 -0400 Subject: [PATCH 038/899] drm/amdgpu: use GRPH_SECONDARY_SURFACE_ADDRESS_MASK with GRPH_SECONDARY_SURFACE_ADDRESS in DCE6 It seems a copy-paste error: since we are working with mmGRPH_SECONDARY_SURFACE_ADDRESS, GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK should be used. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 98d6ffcfefe3d..2492f793c114d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2040,7 +2040,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); + (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK); WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap); -- GitLab From 6168cb7a313694567872d0b16e84c0b8db34fa0c Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:49 -0400 Subject: [PATCH 039/899] drm/amdgpu: move DCE6 away from sid.h and si_enums.h defines This cleans up DCE6. I added some minor tweaks taken from CIK to exit early v2: minor fixes (Alex) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 156 +++++++-------- drivers/gpu/drm/amd/amdgpu/si_enums.h | 14 -- drivers/gpu/drm/amd/amdgpu/sid.h | 268 ++++++++++---------------- 3 files changed, 176 insertions(+), 262 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 2492f793c114d..cbedf91b68bf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1011,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* select wm A */ arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset); tmp = arb_control3; - tmp &= ~LATENCY_WATERMARK_MASK(3); - tmp |= LATENCY_WATERMARK_MASK(1); + tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); + tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp); WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) | (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT))); /* select wm B */ tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset); - tmp &= ~LATENCY_WATERMARK_MASK(3); - tmp |= LATENCY_WATERMARK_MASK(2); + tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); + tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp); WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) | @@ -1089,7 +1089,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev, } WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset, - DC_LB_MEMORY_CONFIG(tmp)); + (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT)); WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT)); @@ -1306,6 +1306,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 offset; struct drm_connector *connector; struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; @@ -1327,6 +1328,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; + if (!dig || !dig->afmt || !dig->afmt->pin) + return; + + offset = dig->afmt->pin->offset; + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { @@ -1348,7 +1354,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) return; for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { - u32 tmp = 0; + u32 value = 0; u8 stereo_freqs = 0; int max_channels = -1; int j; @@ -1358,12 +1364,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) if (sad->format == eld_reg_to_type[i][1]) { if (sad->channels > max_channels) { - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - MAX_CHANNELS, sad->channels); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - DESCRIPTOR_BYTE_2, sad->byte2); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES, sad->freq); + value = (sad->channels << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) | + (sad->byte2 << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) | + (sad->freq << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT); max_channels = sad->channels; } @@ -1374,13 +1380,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } } - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); + value |= (stereo_freqs << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT); + + WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value); } kfree(sads); - } static void dce_v6_0_audio_enable(struct amdgpu_device *adev, @@ -1886,7 +1892,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_bo *abo; uint64_t fb_location, tiling_flags; uint32_t fb_format, fb_pitch_pixels, pipe_config; - u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE); + u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); u32 viewport_w, viewport_h; int r; bool bypass_lut = false; @@ -1926,76 +1932,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, switch (target_fb->format->format) { case DRM_FORMAT_C8: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) | - GRPH_FORMAT(GRPH_FORMAT_INDEXED)); + fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); break; case DRM_FORMAT_XRGB4444: case DRM_FORMAT_ARGB4444: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB4444)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB1555: case DRM_FORMAT_ARGB1555: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB1555)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_BGRX5551: case DRM_FORMAT_BGRA5551: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_BGRA5551)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_RGB565: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB565)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_ARGB2101010: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB2101010)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; case DRM_FORMAT_BGRX1010102: case DRM_FORMAT_BGRA1010102: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_BGRA1010102)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) | GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R)); #ifdef __BIG_ENDIAN - fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; default: @@ -2013,18 +2019,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - fb_format |= GRPH_NUM_BANKS(num_banks); - fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1); - fb_format |= GRPH_TILE_SPLIT(tile_split); - fb_format |= GRPH_BANK_WIDTH(bankw); - fb_format |= GRPH_BANK_HEIGHT(bankh); - fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect); + fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT); + fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT); + fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT); + fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT); + fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT); + fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT); } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) { - fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1); + fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT); } pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); - fb_format |= GRPH_PIPE_CONFIG(pipe_config); + fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT); dce_v6_0_vga_enable(crtc, false); @@ -2115,7 +2121,6 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc, static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -2125,15 +2130,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) | - (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT))); + ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) | + (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT))); WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK); WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset, PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK); WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) | - (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT))); + ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) | + (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT))); WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0); @@ -2160,19 +2165,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | - (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | - (0 << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) | - (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); + ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | - (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT))); + ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | + (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT))); WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) | - (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT))); + ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) | + (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT))); WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) | - (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT))); + ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) | + (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT))); /* XXX match this to the depth of the crtc fmt block, move to modeset? */ WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0); @@ -2267,8 +2272,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc) WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); - - } static void dce_v6_0_show_cursor(struct drm_crtc *crtc) @@ -2285,7 +2288,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc) CUR_CONTROL__CURSOR_EN_MASK | (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); - } static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, @@ -2596,7 +2598,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct drm_encoder *encoder; @@ -2669,7 +2670,7 @@ static void dce_v6_0_panic_flush(struct drm_plane *plane) /* Disable DC tiling */ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); - fb_format &= ~GRPH_ARRAY_MODE(0x7); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); } @@ -2745,7 +2746,6 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block) static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - bool ret; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { @@ -2789,8 +2789,7 @@ static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - ret = amdgpu_atombios_get_connector_info_from_object_table(adev); - if (ret) + if (amdgpu_atombios_get_connector_info_from_object_table(adev)) amdgpu_display_print_display_setup(adev_to_drm(adev)); else return -EINVAL; @@ -3172,7 +3171,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev, spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){ + if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " "AMDGPU_FLIP_SUBMITTED(%d)\n", amdgpu_crtc->pflip_status, @@ -3249,12 +3248,10 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .set_powergating_state = dce_v6_0_set_powergating_state, }; -static void -dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, +static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); @@ -3274,7 +3271,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder) { - struct amdgpu_device *adev = drm_to_adev(encoder->dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); @@ -3314,7 +3310,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder) static void dce_v6_0_encoder_commit(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -3325,7 +3320,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder) static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index 7c4dc9c5373ef..aee3036be30ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -33,10 +33,6 @@ #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) -#define LATENCY_WATERMARK_MASK(x) ((x) << 16) -#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) - -#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) #define GRPH_ENDIAN_NONE 0 #define GRPH_ENDIAN_8IN16 1 #define GRPH_ENDIAN_8IN32 2 @@ -62,12 +58,10 @@ #define GRPH_ALPHA_SEL_G 2 #define GRPH_ALPHA_SEL_B 3 -#define GRPH_DEPTH(x) (((x) & 0x3) << 0) #define GRPH_DEPTH_8BPP 0 #define GRPH_DEPTH_16BPP 1 #define GRPH_DEPTH_32BPP 2 -#define GRPH_FORMAT(x) (((x) & 0x7) << 8) #define GRPH_FORMAT_INDEXED 0 #define GRPH_FORMAT_ARGB1555 0 #define GRPH_FORMAT_ARGB565 1 @@ -84,18 +78,10 @@ #define GRPH_FORMAT_RGB111110 6 #define GRPH_FORMAT_BGR101111 7 -#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2) -#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) #define GRPH_ARRAY_LINEAR_GENERAL 0 #define GRPH_ARRAY_LINEAR_ALIGNED 1 #define GRPH_ARRAY_1D_TILED_THIN1 2 #define GRPH_ARRAY_2D_TILED_THIN1 4 -#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13) -#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6) -#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11) -#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18) -#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24) #define CURSOR_EN (1 << 0) #define CURSOR_MODE(x) (((x) & 0x3) << 8) diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 00eb40d4c1a2f..86b6e830cd71e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -772,17 +772,12 @@ #define PORT_CONNECTIVITY_MASK (3 << 30) #define PORT_CONNECTIVITY_SHIFT 30 -#define DC_LB_MEMORY_SPLIT 0x1AC3 -#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) - #define PRIORITY_A_CNT 0x1AC6 #define PRIORITY_MARK_MASK 0x7fff #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) #define PRIORITY_B_CNT 0x1AC7 -#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32 -# define LATENCY_WATERMARK_MASK(x) ((x) << 16) #define DPG_PIPE_LATENCY_CONTROL 0x1B33 # define LATENCY_LOW_WATERMARK(x) ((x) << 0) # define LATENCY_HIGH_WATERMARK(x) ((x) << 16) @@ -1794,109 +1789,6 @@ #define FMT_50FRC_SEL(x) ((x) << 28) #define FMT_75FRC_SEL(x) ((x) << 30) -#define EVERGREEN_DC_LUT_CONTROL 0x1a80 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86 -#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c -#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79 -#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e -#define EVERGREEN_DC_LUT_RW_MODE 0x1a78 - -#define EVERGREEN_GRPH_ENABLE 0x1a00 -#define EVERGREEN_GRPH_CONTROL 0x1a01 -#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0) -#define EVERGREEN_GRPH_DEPTH_8BPP 0 -#define EVERGREEN_GRPH_DEPTH_16BPP 1 -#define EVERGREEN_GRPH_DEPTH_32BPP 2 -#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2) -#define EVERGREEN_ADDR_SURF_2_BANK 0 -#define EVERGREEN_ADDR_SURF_4_BANK 1 -#define EVERGREEN_ADDR_SURF_8_BANK 2 -#define EVERGREEN_ADDR_SURF_16_BANK 3 -#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4) -#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6) -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3 -#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8) - -#define EVERGREEN_GRPH_FORMAT_INDEXED 0 -#define EVERGREEN_GRPH_FORMAT_ARGB1555 0 -#define EVERGREEN_GRPH_FORMAT_ARGB565 1 -#define EVERGREEN_GRPH_FORMAT_ARGB4444 2 -#define EVERGREEN_GRPH_FORMAT_AI88 3 -#define EVERGREEN_GRPH_FORMAT_MONO16 4 -#define EVERGREEN_GRPH_FORMAT_BGRA5551 5 - -/* 32 BPP */ -#define EVERGREEN_GRPH_FORMAT_ARGB8888 0 -#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1 -#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2 -#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3 -#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4 -#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5 -#define EVERGREEN_GRPH_FORMAT_RGB111110 6 -#define EVERGREEN_GRPH_FORMAT_BGR101111 7 -#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11) -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3 -#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13) -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6 -#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18) -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3 -#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0 -#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1 -#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2 -#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3 - -#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03 -#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) -# define EVERGREEN_GRPH_ENDIAN_NONE 0 -# define EVERGREEN_GRPH_ENDIAN_8IN16 1 -# define EVERGREEN_GRPH_ENDIAN_8IN32 2 -# define EVERGREEN_GRPH_ENDIAN_8IN64 3 -#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -# define EVERGREEN_GRPH_RED_SEL_R 0 -# define EVERGREEN_GRPH_RED_SEL_G 1 -# define EVERGREEN_GRPH_RED_SEL_B 2 -# define EVERGREEN_GRPH_RED_SEL_A 3 -#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -# define EVERGREEN_GRPH_GREEN_SEL_G 0 -# define EVERGREEN_GRPH_GREEN_SEL_B 1 -# define EVERGREEN_GRPH_GREEN_SEL_A 2 -# define EVERGREEN_GRPH_GREEN_SEL_R 3 -#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -# define EVERGREEN_GRPH_BLUE_SEL_B 0 -# define EVERGREEN_GRPH_BLUE_SEL_A 1 -# define EVERGREEN_GRPH_BLUE_SEL_R 2 -# define EVERGREEN_GRPH_BLUE_SEL_G 3 -#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -# define EVERGREEN_GRPH_ALPHA_SEL_A 0 -# define EVERGREEN_GRPH_ALPHA_SEL_R 1 -# define EVERGREEN_GRPH_ALPHA_SEL_G 2 -# define EVERGREEN_GRPH_ALPHA_SEL_B 3 - #define EVERGREEN_D3VGA_CONTROL 0xf8 #define EVERGREEN_D4VGA_CONTROL 0xf9 #define EVERGREEN_D5VGA_CONTROL 0xfa @@ -1956,65 +1848,6 @@ # define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16) # define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - -#define NI_INPUT_CSC_CONTROL 0x1a35 -# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0) -# define NI_INPUT_CSC_BYPASS 0 -# define NI_INPUT_CSC_PROG_COEFF 1 -# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2 -# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4) - -#define NI_OUTPUT_CSC_CONTROL 0x1a3c -# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0) -# define NI_OUTPUT_CSC_BYPASS 0 -# define NI_OUTPUT_CSC_TV_RGB 1 -# define NI_OUTPUT_CSC_YCBCR_601 2 -# define NI_OUTPUT_CSC_YCBCR_709 3 -# define NI_OUTPUT_CSC_PROG_COEFF 4 -# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5 -# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4) - -#define NI_DEGAMMA_CONTROL 0x1a58 -# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0) -# define NI_DEGAMMA_BYPASS 0 -# define NI_DEGAMMA_SRGB_24 1 -# define NI_DEGAMMA_XVYCC_222 2 -# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4) -# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8) -# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12) - -#define NI_GAMUT_REMAP_CONTROL 0x1a59 -# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0) -# define NI_GAMUT_REMAP_BYPASS 0 -# define NI_GAMUT_REMAP_PROG_COEFF 1 -# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2 -# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3 -# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4) - -#define NI_REGAMMA_CONTROL 0x1aa0 -# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0) -# define NI_REGAMMA_BYPASS 0 -# define NI_REGAMMA_SRGB_24 1 -# define NI_REGAMMA_XVYCC_222 2 -# define NI_REGAMMA_PROG_A 3 -# define NI_REGAMMA_PROG_B 4 -# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4) - - -#define NI_PRESCALE_GRPH_CONTROL 0x1a2d -# define NI_GRPH_PRESCALE_BYPASS (1 << 4) - -#define NI_PRESCALE_OVL_CONTROL 0x1a31 -# define NI_OVL_PRESCALE_BYPASS (1 << 4) - -#define NI_INPUT_GAMMA_CONTROL 0x1a10 -# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0) -# define NI_INPUT_GAMMA_USE_LUT 0 -# define NI_INPUT_GAMMA_BYPASS 1 -# define NI_INPUT_GAMMA_SRGB_24 2 -# define NI_INPUT_GAMMA_XVYCC_222 3 -# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4) - #define BLACKOUT_MODE_MASK 0x00000007 #define VGA_RENDER_CONTROL 0xC0 #define R_000300_VGA_RENDER_CONTROL 0xC0 @@ -2074,6 +1907,107 @@ #define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2 #define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1 +#define GRPH_DEPTH_8BPP 0 +#define GRPH_DEPTH_16BPP 1 +#define GRPH_DEPTH_32BPP 2 + +/* 8 BPP */ +#define GRPH_FORMAT_INDEXED 0 + +/* 16 BPP */ +#define GRPH_FORMAT_ARGB1555 0 +#define GRPH_FORMAT_ARGB565 1 +#define GRPH_FORMAT_ARGB4444 2 +#define GRPH_FORMAT_AI88 3 +#define GRPH_FORMAT_MONO16 4 +#define GRPH_FORMAT_BGRA5551 5 + +/* 32 BPP */ +#define GRPH_FORMAT_ARGB8888 0 +#define GRPH_FORMAT_ARGB2101010 1 +#define GRPH_FORMAT_32BPP_DIG 2 +#define GRPH_FORMAT_8B_ARGB2101010 3 +#define GRPH_FORMAT_BGRA1010102 4 +#define GRPH_FORMAT_8B_BGRA1010102 5 +#define GRPH_FORMAT_RGB111110 6 +#define GRPH_FORMAT_BGR101111 7 + +#define ES_AND_GS_AUTO 3 +#define BUF_SWAP_32BIT (2 << 16) + +#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) +#define GRPH_RED_SEL_R 0 +#define GRPH_RED_SEL_G 1 +#define GRPH_RED_SEL_B 2 +#define GRPH_RED_SEL_A 3 + +#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) +#define GRPH_GREEN_SEL_G 0 +#define GRPH_GREEN_SEL_B 1 +#define GRPH_GREEN_SEL_A 2 +#define GRPH_GREEN_SEL_R 3 + +#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) +#define GRPH_BLUE_SEL_B 0 +#define GRPH_BLUE_SEL_A 1 +#define GRPH_BLUE_SEL_R 2 +#define GRPH_BLUE_SEL_G 3 + +#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) +#define GRPH_ALPHA_SEL_A 0 +#define GRPH_ALPHA_SEL_R 1 +#define GRPH_ALPHA_SEL_G 2 +#define GRPH_ALPHA_SEL_B 3 + +/* CUR_CONTROL */ + #define CURSOR_MONO 0 + #define CURSOR_24_1 1 + #define CURSOR_24_8_PRE_MULT 2 + #define CURSOR_24_8_UNPRE_MULT 3 + #define CURSOR_URGENT_ALWAYS 0 + #define CURSOR_URGENT_1_8 1 + #define CURSOR_URGENT_1_4 2 + #define CURSOR_URGENT_3_8 3 + #define CURSOR_URGENT_1_2 4 + +/* INPUT_CSC_CONTROL */ +# define INPUT_CSC_BYPASS 0 +# define INPUT_CSC_PROG_COEFF 1 +# define INPUT_CSC_PROG_SHARED_MATRIXA 2 + +/* OUTPUT_CSC_CONTROL */ +# define OUTPUT_CSC_BYPASS 0 +# define OUTPUT_CSC_TV_RGB 1 +# define OUTPUT_CSC_YCBCR_601 2 +# define OUTPUT_CSC_YCBCR_709 3 +# define OUTPUT_CSC_PROG_COEFF 4 +# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5 + +/* DEGAMMA_CONTROL */ +# define DEGAMMA_BYPASS 0 +# define DEGAMMA_SRGB_24 1 +# define DEGAMMA_XVYCC_222 2 + +/* GAMUT_REMAP_CONTROL */ +# define GAMUT_REMAP_BYPASS 0 +# define GAMUT_REMAP_PROG_COEFF 1 +# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2 +# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3 + +/* REGAMMA_CONTROL */ +# define REGAMMA_BYPASS 0 +# define REGAMMA_SRGB_24 1 +# define REGAMMA_XVYCC_222 2 +# define REGAMMA_PROG_A 3 +# define REGAMMA_PROG_B 4 + + +/* INPUT_GAMMA_CONTROL */ +# define INPUT_GAMMA_USE_LUT 0 +# define INPUT_GAMMA_BYPASS 1 +# define INPUT_GAMMA_SRGB_24 2 +# define INPUT_GAMMA_XVYCC_222 3 + #define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000 #define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11 #define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800 -- GitLab From 0ba7e47e8e3d1d6abad810d68b4b1a52c90a9242 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:50 -0400 Subject: [PATCH 040/899] drm/amdgpu: add missing DMA defines, shifts and masks They will be used later when switching away from sid.h/si_enums.h. v2: fix up whitespace (Alex) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- .../drm/amd/include/asic_reg/oss/oss_1_0_d.h | 21 ++++++++-- .../include/asic_reg/oss/oss_1_0_sh_mask.h | 41 +++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h index edc8a793a95de..cef0263596918 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h @@ -234,6 +234,24 @@ #define mmIH_RB_WPTR_ADDR_HI 0x0F84 #define mmIH_RB_WPTR_ADDR_LO 0x0F85 #define mmIH_STATUS 0x0F88 + +#define mmDMA_GFX_RB_CNTL 0x3400 +#define mmDMA_GFX_RB_BASE 0x3401 +#define mmDMA_GFX_RB_RPTR 0x3402 +#define mmDMA_GFX_RB_WPTR 0x3403 +#define mmDMA_GFX_RB_RPTR_ADDR_HI 0x3407 +#define mmDMA_GFX_RB_RPTR_ADDR_LO 0x3408 +#define mmDMA_GFX_IB_CNTL 0x3409 +#define mmDMA_GFX_IB_RPTR 0x340a +#define mmDMA_CNTL 0x340b +#define mmDMA_STATUS_REG 0x340D +#define mmDMA_TILING_CONFIG 0x342E +#define mmDMA_POWER_CNTL 0x342F +#define mmDMA_CLK_CTRL 0x3430 +#define mmDMA_PG 0x3435 +#define mmDMA_PGFSM_CONFIG 0x3436 +#define mmDMA_PGFSM_WRITE 0x3437 + #define mmSEM_MAILBOX 0x0F9B #define mmSEM_MAILBOX_CLIENTCONFIG 0x0F9A #define mmSEM_MAILBOX_CONTROL 0x0F9C @@ -269,7 +287,4 @@ #define mmVCE_CONFIG 0x0F94 #define mmXDMA_MSTR_MEM_OVERFLOW_CNTL 0x03F8 -/* from the old sid.h */ -#define mmDMA_TILING_CONFIG 0x342E - #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h index 1c540fe136cb2..9f7fc2428b69e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h @@ -823,6 +823,43 @@ #define LX3__RESERVED__SHIFT 0x00000000 #define RINGOSC_MASK__MASK_MASK 0x0000ffffL #define RINGOSC_MASK__MASK__SHIFT 0x00000000 + +#define DMA_CNTL__TRAP_ENABLE_MASK 0x00000001L +#define DMA_CNTL__TRAP_ENABLE__SHIFT 0x00000000 +#define DMA_CNTL__SEM_INCOMPLETE_INT_ENABLE_MASK 0x00000002L +#define DMA_CNTL__SEM_INCOMPLETE_INT_ENABLE__SHIFT 0x00000001 +#define DMA_CNTL__SEM_WAIT_INT_ENABLE_MASK 0x00000004L +#define DMA_CNTL__SEM_WAIT_INT_ENABLE__SHIFT 0x00000002 +#define DMA_CNTL__DATA_SWAP_ENABLE_MASK 0x00000008L +#define DMA_CNTL__DATA_SWAP_ENABLE__SHIFT 0x00000003 +#define DMA_CNTL__FENCE_SWAP_ENABLE_MASK 0x00000010L +#define DMA_CNTL__FENCE_SWAP_ENABLE__SHIFT 0x00000004 +#define DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK 0x10000000L +#define DMA_CNTL__CTXEMPTY_INT_ENABLE__SHIFT 0x0000001C +#define DMA_GFX_RB_CNTL__RB_ENABLE_MASK 0x00000001L +#define DMA_GFX_RB_CNTL__RB_ENABLE__SHIFT 0x00000000 +#define DMA_GFX_RB_CNTL__RB_SIZE__SHIFT 0x00000001 +#define DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK 0x00000200L +#define DMA_GFX_RB_CNTL__RB_SWAP_ENABLE__SHIFT 0x00000009 +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK 0x00001000L +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT 0x0000000C +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK 0x00002000L +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE__SHIFT 0x0000000D +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT 0x00000010 +#define DMA_GFX_IB_CNTL__IB_ENABLE_MASK 0x00000001L +#define DMA_GFX_IB_CNTL__IB_ENABLE__SHIFT 0x00000000 +#define DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK 0x00000010L +#define DMA_GFX_IB_CNTL__IB_SWAP_ENABLE__SHIFT 0x00000004 +#define DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK 0x80000000L +#define DMA_GFX_IB_CNTL__CMD_VMID_FORCE__SHIFT 0x0000001F + +#define DMA_STATUS_REG__IDLE_MASK 0x00000001L +#define DMA_STATUS_REG__IDLE__SHIFT 0x00000000 +#define DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK 0x00000100L +#define DMA_POWER_CNTL__MEM_POWER_OVERRIDE__SHIFT 0x00000008 +#define DMA_PG__PG_CNTL_ENABLE_MASK 0x00000001L +#define DMA_PG__PG_CNTL_ENABLE__SHIFT 0x00000000 + #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0_MASK 0x00000007L #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0__SHIFT 0x00000000 #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT1_MASK 0x00000038L @@ -1015,6 +1052,10 @@ #define SRBM_STATUS2__VCE_BUSY__SHIFT 0x00000007 #define SRBM_STATUS2__VCE_RQ_PENDING_MASK 0x00000008L #define SRBM_STATUS2__VCE_RQ_PENDING__SHIFT 0x00000003 +#define SRBM_STATUS2__DMA_BUSY_MASK 0x00000020L +#define SRBM_STATUS2__DMA_BUSY__SHIFT 0x00000005 +#define SRBM_STATUS2__DMA1_BUSY_MASK 0x00000040L +#define SRBM_STATUS2__DMA1_BUSY__SHIFT 0x00000006 #define SRBM_STATUS2__XDMA_BUSY_MASK 0x00000100L #define SRBM_STATUS2__XDMA_BUSY__SHIFT 0x00000008 #define SRBM_STATUS2__XSP_BUSY_MASK 0x00000010L -- GitLab From 535b6191904dd5a4f26c332ccc40c68f6d2b1233 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:51 -0400 Subject: [PATCH 041/899] drm/amdgpu: add missing GFX6 defines They will be used later when switching away from sid.h/si_enums.h. v2: fix whitespace (Alex) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h index c75aee25619e8..6f44345277af6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h @@ -1779,6 +1779,8 @@ #define mmRLC_TTOP_D 0x3105 #define mmRLC_CLEAR_STATE_RESTORE_BASE 0x30C8 #define mmRLC_PG_AO_CU_MASK 0x310B +#define mmSPI_STATIC_THREAD_MGMT_1 0x2438 +#define mmSPI_STATIC_THREAD_MGMT_2 0x2439 #define mmSPI_STATIC_THREAD_MGMT_3 0x243A #endif -- GitLab From 230a4b0528c0f2439df3f6cc5a6f053089a116b0 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:54 -0400 Subject: [PATCH 042/899] drm/amdgpu: make GFX6 easier to read Just fix the style and add a comment for reading easiness Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index fedf885955978..9c6453b458b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1735,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_get_cu_info(adev); gfx_v6_0_config_init(adev); - WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | - (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); - WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | - (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); + WREG32(mmCP_QUEUE_THRESHOLDS, + ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | + (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); + + /* set HW defaults for 3D engine */ + WREG32(mmCP_MEQ_THRESHOLDS, + (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | + (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); sx_debug_1 = RREG32(mmSX_DEBUG_1); WREG32(mmSX_DEBUG_1, sx_debug_1); -- GitLab From 14f15aa054419ecd3a2578822958f87a84142bee Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:56 -0400 Subject: [PATCH 043/899] drm/amdgpu: move si_dma.c away from sid.h and si_enums.h Replace defines for the ones in oss_1_0_d.h and oss_1_0_sh_mask.h Taking the opportunity to add some comments taken from cik_sdma.c Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 4 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 108 ++++++++++++++++------------ drivers/gpu/drm/amd/amdgpu/sid.h | 36 ---------- 3 files changed, 63 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 448f246f45374..94a6ec162640e 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1130,8 +1130,8 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmGRBM_STATUS_SE1}, {mmSRBM_STATUS}, {mmSRBM_STATUS2}, - {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, - {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET}, {mmCP_STAT}, {mmCP_STALLED_STAT1}, {mmCP_STALLED_STAT2}, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 552efcc7764fe..bfdaaca434ab6 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -40,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); +/** + * si_dma_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return *ring->rptr_cpu_addr; } +/** + * si_dma_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; + return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; } static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) @@ -58,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -119,9 +133,9 @@ static void si_dma_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { /* dma0 */ - rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); + rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); } } @@ -142,37 +156,37 @@ static int si_dma_start(struct amdgpu_device *adev) rb_bufsz = order_base_2(ring->ring_size / 4); rb_cntl = rb_bufsz << 1; #ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; #endif - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); - WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); rptr_addr = ring->rptr_gpu_addr; - WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); - WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; - WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); + WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; + ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; #ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; + ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; #endif - WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); + WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); + dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); + dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; + WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); r = amdgpu_ring_test_helper(ring); if (r) @@ -547,9 +561,9 @@ static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS2); + u32 tmp = RREG32(mmSRBM_STATUS2); - if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) + if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) return false; return true; @@ -585,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -601,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -647,11 +661,11 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data &= ~MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); - WREG32(DMA_CLK_CTRL + offset, 0x00000100); + WREG32(mmDMA_POWER_CNTL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); } } else { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -659,15 +673,15 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data |= MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); + WREG32(mmDMA_POWER_CNTL + offset, data); - orig = data = RREG32(DMA_CLK_CTRL + offset); + orig = data = RREG32(mmDMA_CLK_CTRL + offset); data = 0xff000000; if (data != orig) - WREG32(DMA_CLK_CTRL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, data); } } @@ -681,11 +695,11 @@ static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, struct amdgpu_device *adev = ip_block->adev; - WREG32(DMA_PGFSM_WRITE, 0x00002000); - WREG32(DMA_PGFSM_CONFIG, 0x100010ff); + WREG32(mmDMA_PGFSM_WRITE, 0x00002000); + WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); for (tmp = 0; tmp < 5; tmp++) - WREG32(DMA_PGFSM_WRITE, 0); + WREG32(mmDMA_PGFSM_WRITE, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 86b6e830cd71e..085b2b1cf120b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1559,44 +1559,11 @@ #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ #define DMA1_REGISTER_OFFSET 0x200 /* not a register */ -#define DMA_RB_CNTL 0x3400 -# define DMA_RB_ENABLE (1 << 0) -# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ -# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) -# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ -#define DMA_RB_BASE 0x3401 -#define DMA_RB_RPTR 0x3402 -#define DMA_RB_WPTR 0x3403 - -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 - -#define DMA_IB_CNTL 0x3409 -# define DMA_IB_ENABLE (1 << 0) -# define DMA_IB_SWAP_ENABLE (1 << 4) -# define CMD_VMID_FORCE (1 << 31) #define DMA_IB_RPTR 0x340a -#define DMA_CNTL 0x340b -# define TRAP_ENABLE (1 << 0) -# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) -# define SEM_WAIT_INT_ENABLE (1 << 2) -# define DATA_SWAP_ENABLE (1 << 3) -# define FENCE_SWAP_ENABLE (1 << 4) -# define CTXEMPTY_INT_ENABLE (1 << 28) -#define DMA_STATUS_REG 0x340d -# define DMA_IDLE (1 << 0) #define DMA_TILING_CONFIG 0x342e -#define DMA_POWER_CNTL 0x342f -# define MEM_POWER_OVERRIDE (1 << 8) -#define DMA_CLK_CTRL 0x3430 - #define DMA_PG 0x3435 # define PG_CNTL_ENABLE (1 << 0) -#define DMA_PGFSM_CONFIG 0x3436 -#define DMA_PGFSM_WRITE 0x3437 #define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ (((b) & 0x1) << 26) | \ @@ -2035,10 +2002,7 @@ #define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 #define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 #define DMA_MODE 0x342f -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 #define DMA_BUSY_MASK 0x20 -#define DMA1_BUSY_MASK 0X40 #define SDMA_MAX_INSTANCE 2 #define PCIE_BUS_CLK 10000 -- GitLab From d35a412910906ffdcb46a33b20ccd4676b62fccd Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:58 -0400 Subject: [PATCH 044/899] drm/amdgpu: keep removing sid.h dependency from si_dma.c Move and rename DMA_SEM_INCOMPLETE_TIMER_CNTL and DMA_SEM_WAIT_FAIL_TIMER_CNTL in oss_1_0_d.h Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dma.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sid.h | 2 -- drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h | 2 ++ 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index bfdaaca434ab6..7f18e4875287c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -149,8 +149,8 @@ static int si_dma_start(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); @@ -477,7 +477,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->sdma.num_instances = 2; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; si_dma_set_ring_funcs(adev); si_dma_set_buffer_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 085b2b1cf120b..0ace1ede24a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1999,8 +1999,6 @@ #define AMDGPU_PCIE_INDEX 0xc #define AMDGPU_PCIE_DATA 0xd -#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 -#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 #define DMA_MODE 0x342f #define DMA_BUSY_MASK 0x20 #define SDMA_MAX_INSTANCE 2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h index cef0263596918..4dd386b98748a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h @@ -246,6 +246,8 @@ #define mmDMA_CNTL 0x340b #define mmDMA_STATUS_REG 0x340D #define mmDMA_TILING_CONFIG 0x342E +#define mmDMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 +#define mmDMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 #define mmDMA_POWER_CNTL 0x342F #define mmDMA_CLK_CTRL 0x3430 #define mmDMA_PG 0x3435 -- GitLab From b71b7cd91c642f282794b82cf2b8159899b9aa59 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:59 -0400 Subject: [PATCH 045/899] drm/amdgpu: cleanup DCE6 a bit more Use shifts already available in DCE6's defines, masks and shifts. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sid.h | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index cbedf91b68bf7..2d85ea7159de8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1998,8 +1998,8 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, case DRM_FORMAT_ABGR8888: fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); - fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) | - GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R)); + fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) | + (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); #ifdef __BIG_ENDIAN fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 0ace1ede24a75..fc61c2ea002f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1902,25 +1902,21 @@ #define ES_AND_GS_AUTO 3 #define BUF_SWAP_32BIT (2 << 16) -#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) #define GRPH_RED_SEL_R 0 #define GRPH_RED_SEL_G 1 #define GRPH_RED_SEL_B 2 #define GRPH_RED_SEL_A 3 -#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) #define GRPH_GREEN_SEL_G 0 #define GRPH_GREEN_SEL_B 1 #define GRPH_GREEN_SEL_A 2 #define GRPH_GREEN_SEL_R 3 -#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) #define GRPH_BLUE_SEL_B 0 #define GRPH_BLUE_SEL_A 1 #define GRPH_BLUE_SEL_R 2 #define GRPH_BLUE_SEL_G 3 -#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) #define GRPH_ALPHA_SEL_A 0 #define GRPH_ALPHA_SEL_R 1 #define GRPH_ALPHA_SEL_G 2 -- GitLab From 48b733d99b0d8db01fcf55d7ae31f69510fb1a4a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Feb 2025 12:31:28 -0500 Subject: [PATCH 046/899] drm/amdgpu: add rebar parameter Add a new parameter to disable BAR resizing. Note that this only disables the driver from attempting to resize the BAR, The BIOS may have resized the BAR at boot. Some teams have found this useful in debugging P2P DMA issues on systems where the available MMIO space did not allow for all of the GPUs present to resize their BARs. Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9a9e5249c63f9..8316f93c1cce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -266,6 +266,7 @@ extern int amdgpu_umsch_mm_fwlog; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; +extern int amdgpu_rebar; extern int amdgpu_wbrf; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7525128f971f4..f4a01704effcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1680,6 +1680,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + if (!amdgpu_rebar) + return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ if ((amdgpu_runtime_pm != 0) && adev->pdev->vendor == PCI_VENDOR_ID_ATI && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 26bf896f1444e..2ca04d6aaa822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -238,6 +238,7 @@ int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; +int amdgpu_rebar = -1; /* auto */ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -1096,6 +1097,16 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +/** + * DOC: rebar (int) + * Allow BAR resizing. Disable this to prevent the driver from attempting + * to resize the BAR if the GPU supports it and there is available MMIO space. + * Note that this just prevents the driver from resizing the BAR. The BIOS + * may have already resized the BAR at boot time. + */ +MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(rebar, amdgpu_rebar, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ -- GitLab From c6ae8d587eeb2d285ee13d064fd73ad0568198c4 Mon Sep 17 00:00:00 2001 From: Andres Urian Florez Date: Mon, 24 Mar 2025 19:07:21 -0500 Subject: [PATCH 047/899] drm/amdgpu: Replace deprecated function strcpy() with strscpy() Instead of using the strcpy() deprecated function to populate the fw_name, use the strscpy() function Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy Signed-off-by: Andres Urian Florez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 68 ++++++++++++------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 68bce6a6d09d1..525e53c94f4f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -253,16 +253,16 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { case CHIP_TAHITI: - strcpy(fw_name, "radeon/tahiti_smc.bin"); + strscpy(fw_name, "radeon/tahiti_smc.bin"); break; case CHIP_PITCAIRN: if ((adev->pdev->revision == 0x81) && ((adev->pdev->device == 0x6810) || (adev->pdev->device == 0x6811))) { info->is_kicker = true; - strcpy(fw_name, "radeon/pitcairn_k_smc.bin"); + strscpy(fw_name, "radeon/pitcairn_k_smc.bin"); } else { - strcpy(fw_name, "radeon/pitcairn_smc.bin"); + strscpy(fw_name, "radeon/pitcairn_smc.bin"); } break; case CHIP_VERDE: @@ -276,9 +276,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->device == 0x6823) || (adev->pdev->device == 0x682b)))) { info->is_kicker = true; - strcpy(fw_name, "radeon/verde_k_smc.bin"); + strscpy(fw_name, "radeon/verde_k_smc.bin"); } else { - strcpy(fw_name, "radeon/verde_smc.bin"); + strscpy(fw_name, "radeon/verde_smc.bin"); } break; case CHIP_OLAND: @@ -290,9 +290,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->revision == 0x83) && (adev->pdev->device == 0x6610))) { info->is_kicker = true; - strcpy(fw_name, "radeon/oland_k_smc.bin"); + strscpy(fw_name, "radeon/oland_k_smc.bin"); } else { - strcpy(fw_name, "radeon/oland_smc.bin"); + strscpy(fw_name, "radeon/oland_smc.bin"); } break; case CHIP_HAINAN: @@ -304,13 +304,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->device == 0x6665) || (adev->pdev->device == 0x6667)))) { info->is_kicker = true; - strcpy(fw_name, "radeon/hainan_k_smc.bin"); + strscpy(fw_name, "radeon/hainan_k_smc.bin"); } else if ((adev->pdev->revision == 0xc3) && (adev->pdev->device == 0x6665)) { info->is_kicker = true; - strcpy(fw_name, "radeon/banks_k_2_smc.bin"); + strscpy(fw_name, "radeon/banks_k_2_smc.bin"); } else { - strcpy(fw_name, "radeon/hainan_smc.bin"); + strscpy(fw_name, "radeon/hainan_smc.bin"); } break; case CHIP_BONAIRE: @@ -318,17 +318,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0x81) || (adev->pdev->device == 0x665f)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/bonaire_k_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/bonaire_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_smc.bin"); } break; case CHIP_HAWAII: if (adev->pdev->revision == 0x80) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/hawaii_k_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/hawaii_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_smc.bin"); } break; case CHIP_TOPAZ: @@ -338,76 +338,76 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) || ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/topaz_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_smc.bin"); break; case CHIP_TONGA: if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) || ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/tonga_k_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/tonga_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_smc.bin"); break; case CHIP_FIJI: - strcpy(fw_name, "amdgpu/fiji_smc.bin"); + strscpy(fw_name, "amdgpu/fiji_smc.bin"); break; case CHIP_POLARIS11: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k_smc.bin"); } else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris11_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); } break; case CHIP_POLARIS10: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k_smc.bin"); } else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris10_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); } break; case CHIP_POLARIS12: if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris12_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_smc.bin"); } break; case CHIP_VEGAM: - strcpy(fw_name, "amdgpu/vegam_smc.bin"); + strscpy(fw_name, "amdgpu/vegam_smc.bin"); break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || (adev->pdev->revision == 0xc1) || (adev->pdev->revision == 0xc3))) - strcpy(fw_name, "amdgpu/vega10_acg_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_acg_smc.bin"); else - strcpy(fw_name, "amdgpu/vega10_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_smc.bin"); break; case CHIP_VEGA12: - strcpy(fw_name, "amdgpu/vega12_smc.bin"); + strscpy(fw_name, "amdgpu/vega12_smc.bin"); break; case CHIP_VEGA20: - strcpy(fw_name, "amdgpu/vega20_smc.bin"); + strscpy(fw_name, "amdgpu/vega20_smc.bin"); break; default: DRM_ERROR("SMC firmware not supported\n"); -- GitLab From 3470f80bd36e9b3da2542a63c1b62114eecce01c Mon Sep 17 00:00:00 2001 From: Ananta Srikar Date: Mon, 24 Mar 2025 21:49:12 -0400 Subject: [PATCH 048/899] drm/amd/amdgpu: Fix typo Fixes a typo in the word "version" in an error message. Signed-off-by: Ananta Srikar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ef9538fbbf537..c34a00500e183 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -649,7 +649,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n"); + dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); return -EINVAL; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; -- GitLab From 160d3d39f61cb92a6cabceebc2d226ed3c5ab4c3 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:47:00 -0400 Subject: [PATCH 049/899] drm/amdgpu: continue cleaning up sid.h and si_enums.h Remove more duplicated defines and move some in sid.h for coherence with CIK. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_enums.h | 90 +-------------------------- drivers/gpu/drm/amd/amdgpu/sid.h | 16 ++++- 2 files changed, 14 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index aee3036be30ec..6da65778292bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -23,96 +23,15 @@ #ifndef SI_ENUMS_H #define SI_ENUMS_H -#define VBLANK_ACK (1 << 4) -#define VLINE_ACK (1 << 4) - -#define CURSOR_WIDTH 64 -#define CURSOR_HEIGHT 64 - #define PRIORITY_MARK_MASK 0x7fff #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) -#define GRPH_ENDIAN_NONE 0 -#define GRPH_ENDIAN_8IN16 1 -#define GRPH_ENDIAN_8IN32 2 -#define GRPH_ENDIAN_8IN64 3 -#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -#define GRPH_RED_SEL_R 0 -#define GRPH_RED_SEL_G 1 -#define GRPH_RED_SEL_B 2 -#define GRPH_RED_SEL_A 3 -#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -#define GRPH_GREEN_SEL_G 0 -#define GRPH_GREEN_SEL_B 1 -#define GRPH_GREEN_SEL_A 2 -#define GRPH_GREEN_SEL_R 3 -#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -#define GRPH_BLUE_SEL_B 0 -#define GRPH_BLUE_SEL_A 1 -#define GRPH_BLUE_SEL_R 2 -#define GRPH_BLUE_SEL_G 3 -#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -#define GRPH_ALPHA_SEL_A 0 -#define GRPH_ALPHA_SEL_R 1 -#define GRPH_ALPHA_SEL_G 2 -#define GRPH_ALPHA_SEL_B 3 - -#define GRPH_DEPTH_8BPP 0 -#define GRPH_DEPTH_16BPP 1 -#define GRPH_DEPTH_32BPP 2 - -#define GRPH_FORMAT_INDEXED 0 -#define GRPH_FORMAT_ARGB1555 0 -#define GRPH_FORMAT_ARGB565 1 -#define GRPH_FORMAT_ARGB4444 2 -#define GRPH_FORMAT_AI88 3 -#define GRPH_FORMAT_MONO16 4 -#define GRPH_FORMAT_BGRA5551 5 -#define GRPH_FORMAT_ARGB8888 0 -#define GRPH_FORMAT_ARGB2101010 1 -#define GRPH_FORMAT_32BPP_DIG 2 -#define GRPH_FORMAT_8B_ARGB2101010 3 -#define GRPH_FORMAT_BGRA1010102 4 -#define GRPH_FORMAT_8B_BGRA1010102 5 -#define GRPH_FORMAT_RGB111110 6 -#define GRPH_FORMAT_BGR101111 7 - -#define GRPH_ARRAY_LINEAR_GENERAL 0 -#define GRPH_ARRAY_LINEAR_ALIGNED 1 -#define GRPH_ARRAY_1D_TILED_THIN1 2 -#define GRPH_ARRAY_2D_TILED_THIN1 4 - -#define CURSOR_EN (1 << 0) -#define CURSOR_MODE(x) (((x) & 0x3) << 8) -#define CURSOR_MONO 0 -#define CURSOR_24_1 1 -#define CURSOR_24_8_PRE_MULT 2 -#define CURSOR_24_8_UNPRE_MULT 3 -#define CURSOR_2X_MAGNIFY (1 << 16) -#define CURSOR_FORCE_MC_ON (1 << 20) -#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -#define CURSOR_URGENT_ALWAYS 0 -#define CURSOR_URGENT_1_8 1 -#define CURSOR_URGENT_1_4 2 -#define CURSOR_URGENT_3_8 3 -#define CURSOR_URGENT_1_2 4 -#define CURSOR_UPDATE_PENDING (1 << 0) -#define CURSOR_UPDATE_TAKEN (1 << 1) -#define CURSOR_UPDATE_LOCK (1 << 16) -#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - - -#define ES_AND_GS_AUTO 3 -#define RADEON_PACKET_TYPE3 3 -#define CE_PARTITION_BASE 3 -#define BUF_SWAP_32BIT (2 << 16) - #define GFX_POWER_STATUS (1 << 1) #define GFX_CLOCK_STATUS (1 << 2) #define GFX_LS_STATUS (1 << 3) -#define RLC_BUSY_STATUS (1 << 0) +#define RLC_BUSY_STATUS (1 << 0) #define RLC_PUD(x) ((x) << 0) #define RLC_PUD_MASK (0xff << 0) #define RLC_PDD(x) ((x) << 8) @@ -121,13 +40,6 @@ #define RLC_TTPD_MASK (0xff << 16) #define RLC_MSD(x) ((x) << 24) #define RLC_MSD_MASK (0xff << 24) -#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) -#define WRITE_DATA_DST_SEL(x) ((x) << 8) -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) -#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) -#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) -#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index fc61c2ea002f9..2a43ede5dff4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1592,6 +1592,7 @@ #define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf +/* VCE */ #define VCE_STATUS 0x20004 #define VCE_VCPU_CNTL 0x20014 #define VCE_CLK_EN (1 << 0) @@ -1785,6 +1786,14 @@ #define EVERGREEN_VIEWPORT_SIZE 0x1b5d #define EVERGREEN_DESKTOP_HEIGHT 0x1ac1 +#define GRPH_ARRAY_LINEAR_GENERAL 0 +#define GRPH_ARRAY_LINEAR_ALIGNED 1 +#define GRPH_ARRAY_1D_TILED_THIN1 2 +#define GRPH_ARRAY_2D_TILED_THIN1 4 + +#define ES_AND_GS_AUTO 3 +#define BUF_SWAP_32BIT (2 << 16) + /* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */ #define EVERGREEN_CUR_CONTROL 0x1a66 # define EVERGREEN_CURSOR_EN (1 << 0) @@ -1899,9 +1908,10 @@ #define GRPH_FORMAT_RGB111110 6 #define GRPH_FORMAT_BGR101111 7 -#define ES_AND_GS_AUTO 3 -#define BUF_SWAP_32BIT (2 << 16) - +#define GRPH_ENDIAN_NONE 0 +#define GRPH_ENDIAN_8IN16 1 +#define GRPH_ENDIAN_8IN32 2 +#define GRPH_ENDIAN_8IN64 3 #define GRPH_RED_SEL_R 0 #define GRPH_RED_SEL_G 1 #define GRPH_RED_SEL_B 2 -- GitLab From 8ae1a4eef78c09a61b62f85e2dfc128c7365d18d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Mar 2025 17:35:32 -0500 Subject: [PATCH 050/899] drm/amdgpu: add initial documentation for debugfs files Describes what debugfs files are available and what they are used for. v2: fix some typos (Mark Glines) v3: Address comments from Siqueira and Kent Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/debugfs.rst | 210 +++++++++++++++++++++++++ Documentation/gpu/amdgpu/debugging.rst | 7 + Documentation/gpu/amdgpu/index.rst | 1 + 3 files changed, 218 insertions(+) create mode 100644 Documentation/gpu/amdgpu/debugfs.rst diff --git a/Documentation/gpu/amdgpu/debugfs.rst b/Documentation/gpu/amdgpu/debugfs.rst new file mode 100644 index 0000000000000..fe7736a0b43a6 --- /dev/null +++ b/Documentation/gpu/amdgpu/debugfs.rst @@ -0,0 +1,210 @@ +============== +AMDGPU DebugFS +============== + +The amdgpu driver provides a number of debugfs files to aid in debugging +issues in the driver. These are usually found in +/sys/kernel/debug/dri/. + +DebugFS Files +============= + +amdgpu_benchmark +---------------- + +Run benchmarks using the DMA engine the driver uses for GPU memory paging. +Write a number to the file to run the test. The results are written to the +kernel log. VRAM is on device memory (dGPUs) or cave out (APUs) and GTT +(Graphics Translation Tables) is system memory that is accessible by the GPU. +The following tests are available: + +- 1: simple test, VRAM to GTT and GTT to VRAM +- 2: simple test, VRAM to VRAM +- 3: GTT to VRAM, buffer size sweep, powers of 2 +- 4: VRAM to GTT, buffer size sweep, powers of 2 +- 5: VRAM to VRAM, buffer size sweep, powers of 2 +- 6: GTT to VRAM, buffer size sweep, common display sizes +- 7: VRAM to GTT, buffer size sweep, common display sizes +- 8: VRAM to VRAM, buffer size sweep, common display sizes + +amdgpu_test_ib +-------------- + +Read this file to run simple IB (Indirect Buffer) tests on all kernel managed +rings. IBs are command buffers usually generated by userspace applications +which are submitted to the kernel for execution on an particular GPU engine. +This just runs the simple IB tests included in the kernel. These tests +are engine specific and verify that IB submission works. + +amdgpu_discovery +---------------- + +Provides raw access to the IP discovery binary provided by the GPU. Read this +file to access the raw binary. This is useful for verifying the contents of +the IP discovery table. It is chip specific. + +amdgpu_vbios +------------ + +Provides raw access to the ROM binary image from the GPU. Read this file to +access the raw binary. This is useful for verifying the contents of the +video BIOS ROM. It is board specific. + +amdgpu_evict_gtt +---------------- + +Evict all buffers from the GTT memory pool. Read this file to evict all +buffers from this pool. + +amdgpu_evict_vram +----------------- + +Evict all buffers from the VRAM memory pool. Read this file to evict all +buffers from this pool. + +amdgpu_gpu_recover +------------------ + +Trigger a GPU reset. Read this file to trigger reset the entire GPU. +All work currently running on the GPU will be lost. + +amdgpu_ring_ +------------------ + +Provides read access to the kernel managed ring buffers for each ring . +These are useful for debugging problems on a particular ring. The ring buffer +is how the CPU sends commands to the GPU. The CPU writes commands into the +buffer and then asks the GPU engine to process it. This is the raw binary +contents of the ring buffer. Use a tool like UMR to decode the rings into human +readable form. + +amdgpu_mqd_ +----------------- + +Provides read access to the kernel managed MQD (Memory Queue Descriptor) for +ring managed by the kernel driver. MQDs define the features of the ring +and are used to store the ring's state when it is not connected to hardware. +The driver writes the requested ring features and metadata (GPU addresses of +the ring itself and associated buffers) to the MQD and the firmware uses the MQD +to populate the hardware when the ring is mapped to a hardware slot. Only +available on engines which use MQDs. This provides access to the raw MQD +binary. + +amdgpu_error_ +------------------- + +Provides an interface to set an error code on the dma fences associated with +ring . The error code specified is propogated to all fences associated +with the ring. Use this to inject a fence error into a ring. + +amdgpu_pm_info +-------------- + +Provides human readable information about the power management features +and state of the GPU. This includes current GFX clock, Memory clock, +voltages, average SoC power, temperature, GFX load, Memory load, SMU +feature mask, VCN power state, clock and power gating features. + +amdgpu_firmware_info +-------------------- + +Lists the firmware versions for all firmwares used by the GPU. Only +entries with a non-0 version are valid. If the version is 0, the firmware +is not valid for the GPU. + +amdgpu_fence_info +----------------- + +Shows the last signalled and emitted fence sequence numbers for each +kernel driver managed ring. Fences are associated with submissions +to the engine. Emitted fences have been submitted to the ring +and signalled fences have been signalled by the GPU. Rings with a +larger emitted fence value have outstanding work that is still being +processed by the engine that owns that ring. When the emitted and +signalled fence values are equal, the ring is idle. + +amdgpu_gem_info +--------------- + +Lists all of the PIDs using the GPU and the GPU buffers that they have +allocated. This lists the buffer size, pool (VRAM, GTT, etc.), and buffer +attributes (CPU access required, CPU cache attributes, etc.). + +amdgpu_vm_info +-------------- + +Lists all of the PIDs using the GPU and the GPU buffers that they have +allocated as well as the status of those buffers relative to that process' +GPU virtual address space (e.g., evicted, idle, invalidated, etc.). + +amdgpu_sa_info +-------------- + +Prints out all of the suballocations (sa) by the suballocation manager in the +kernel driver. Prints the GPU address, size, and fence info associated +with each suballocation. The suballocations are used internally within +the kernel driver for various things. + +amdgpu__mm +---------------- + +Prints TTM information about the memory pool . + +amdgpu_vram +----------- + +Provides direct access to VRAM. Used by tools like UMR to inspect +objects in VRAM. + +amdgpu_iomem +------------ + +Provides direct access to GTT memory. Used by tools like UMR to inspect +GTT memory. + +amdgpu_regs_* +------------- + +Provides direct access to various register aperatures on the GPU. Used +by tools like UMR to access GPU registers. + +amdgpu_regs2 +------------ + +Provides an IOCTL interface used by UMR for interacting with GPU registers. + + +amdgpu_sensors +-------------- + +Provides an interface to query GPU power metrics (temperature, average +power, etc.). Used by tools like UMR to query GPU power metrics. + + +amdgpu_gca_config +----------------- + +Provides an interface to query GPU details (Graphics/Compute Array config, +PCI config, GPU family, etc.). Used by tools like UMR to query GPU details. + +amdgpu_wave +----------- + +Used to query GFX/compute wave information from the hardware. Used by tools +like UMR to query GFX/compute wave information. + +amdgpu_gpr +---------- + +Used to query GFX/compute GPR (General Purpose Register) information from the +hardware. Used by tools like UMR to query GPRs when debugging shaders. + +amdgpu_gprwave +-------------- + +Provides an IOCTL interface used by UMR for interacting with shader waves. + +amdgpu_fw_attestation +--------------------- + +Provides an interface for reading back firmware attestation records. diff --git a/Documentation/gpu/amdgpu/debugging.rst b/Documentation/gpu/amdgpu/debugging.rst index e75f97d0e4eaf..7cbfea0606e15 100644 --- a/Documentation/gpu/amdgpu/debugging.rst +++ b/Documentation/gpu/amdgpu/debugging.rst @@ -2,6 +2,13 @@ GPU Debugging =============== +General Debugging Options +========================= + +The DebugFS section provides documentation on a number files to aid in debugging +issues on the GPU. + + GPUVM Debugging =============== diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 302d039928ee8..4c75567854cb2 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -16,5 +16,6 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. thermal driver-misc debugging + debugfs process-isolation amdgpu-glossary -- GitLab From 60d4952d8908c2396e5a005e056423c578e29db6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Mar 2025 10:07:50 -0400 Subject: [PATCH 051/899] drm/amdgpu: drop some dead code Drop the cgs smu firmware code for SI, it's not used. The smu firmware fetching for SI is done in si_dpm.c. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 61 ------------------------- 1 file changed, 61 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 525e53c94f4f5..004a6a9d6b9fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -252,67 +252,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { - case CHIP_TAHITI: - strscpy(fw_name, "radeon/tahiti_smc.bin"); - break; - case CHIP_PITCAIRN: - if ((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6810) || - (adev->pdev->device == 0x6811))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/pitcairn_k_smc.bin"); - } else { - strscpy(fw_name, "radeon/pitcairn_smc.bin"); - } - break; - case CHIP_VERDE: - if (((adev->pdev->device == 0x6820) && - ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83))) || - ((adev->pdev->device == 0x6821) && - ((adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87))) || - ((adev->pdev->revision == 0x87) && - ((adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682b)))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/verde_k_smc.bin"); - } else { - strscpy(fw_name, "radeon/verde_smc.bin"); - } - break; - case CHIP_OLAND: - if (((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6600) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605) || - (adev->pdev->device == 0x6610))) || - ((adev->pdev->revision == 0x83) && - (adev->pdev->device == 0x6610))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/oland_k_smc.bin"); - } else { - strscpy(fw_name, "radeon/oland_smc.bin"); - } - break; - case CHIP_HAINAN: - if (((adev->pdev->revision == 0x81) && - (adev->pdev->device == 0x6660)) || - ((adev->pdev->revision == 0x83) && - ((adev->pdev->device == 0x6660) || - (adev->pdev->device == 0x6663) || - (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/hainan_k_smc.bin"); - } else if ((adev->pdev->revision == 0xc3) && - (adev->pdev->device == 0x6665)) { - info->is_kicker = true; - strscpy(fw_name, "radeon/banks_k_2_smc.bin"); - } else { - strscpy(fw_name, "radeon/hainan_smc.bin"); - } - break; case CHIP_BONAIRE: if ((adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || -- GitLab From 9eab24532691a36ce795f192ea2fb3193bc5c4b3 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 26 Mar 2025 12:53:01 +0530 Subject: [PATCH 052/899] drm/amdgpu/gfx10: Add Cleaner Shader Support for GFX10.3.x GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable the cleaner shader for other GFX10.3.x series of GPUs to provide data isolation between GPU workloads. The cleaner shader is responsible for clearing the Local Data Store (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs), which helps prevent data leakage and ensures accurate computation results. This update extends cleaner shader support to GFX10.3.x GPUs, previously available for GFX10.3.0. It enhances security by clearing GPU memory between processes and maintains a consistent GPU state across KGD and KFD workloads. Cc: Mario Sopena-Novales Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a63ce747863f1..d1c04de0f633d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4810,7 +4810,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } break; case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; @@ -4826,6 +4828,34 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(10, 3, 6): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 14 && + adev->gfx.pfp_fw_version >= 17 && + adev->gfx.mec_fw_version >= 24) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(10, 3, 7): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 4 && + adev->gfx.pfp_fw_version >= 9 && + adev->gfx.mec_fw_version >= 12) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; -- GitLab From 0d47bb77b505e539a99f4f032c9c5a58f1d21e18 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 13:34:33 -0400 Subject: [PATCH 053/899] drm/amdgpu/gfx: make amdgpu_gfx_me_queue_to_bit() static It's not used outside of amdgpu_gfx.c. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 72af5e5a894a2..04982b7f33a8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -74,8 +74,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, adev->gfx.mec_bitmap[xcc_id].queue_bitmap); } -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, - int me, int pipe, int queue) +static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) { int bit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 87e8621887667..5d70b3ae3fe1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -550,8 +550,6 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, - int pipe, int queue); bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); -- GitLab From 8f970c46b56235a3965e0965fa3fd60e7567fecc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 14:10:17 -0400 Subject: [PATCH 054/899] drm/amdgpu/gfx: decouple the number of kgqs from the hw The driver currently sets up one kgq per pipe. As such adev->gfx.me.num_queue_per_pipe is hardcoded to 1 everywhere. This is fine for kernel queues, but when we enable user queues we need to know that actual number of queues per pipe. Decouple the kgq setup from the actual hardware count. For dev core dumps and user queues, we want to know the actual number of queues per pipe. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 13 +++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 3 ++- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 04982b7f33a8a..f64675b2ab752 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -77,11 +77,12 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue) { + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ int bit = 0; bit += me * adev->gfx.me.num_pipe_per_me - * adev->gfx.me.num_queue_per_pipe; - bit += pipe * adev->gfx.me.num_queue_per_pipe; + * num_queue_per_pipe; + bit += pipe * num_queue_per_pipe; bit += queue; return bit; @@ -238,8 +239,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe; bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); - int max_queues_per_me = adev->gfx.me.num_pipe_per_me * - adev->gfx.me.num_queue_per_pipe; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ + int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe; if (multipipe_policy) { /* policy: amdgpu owns the first queue per pipe at this stage @@ -247,9 +248,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) for (i = 0; i < max_queues_per_me; i++) { pipe = i % adev->gfx.me.num_pipe_per_me; queue = (i / adev->gfx.me.num_pipe_per_me) % - adev->gfx.me.num_queue_per_pipe; + num_queue_per_pipe; - set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, + set_bit(pipe * num_queue_per_pipe + queue, adev->gfx.me.queue_bitmap); } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d1c04de0f633d..cba9b973f0a76 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4752,6 +4752,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -4916,7 +4917,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d57db42f95360..0c9b28a460503 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1571,6 +1571,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1703,7 +1704,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index e7b58e4702927..7b20ab48f762b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1346,6 +1346,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) unsigned num_compute_rings; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1435,7 +1436,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; -- GitLab From 9cffd67e803a081ce548488161c69f2cddb97fe7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 14:24:47 -0400 Subject: [PATCH 055/899] drm/amdgpu/gfx: assign the actual me0 queues per pipe Set the actual number of queues per pipe for ME0 (gfx). This way we will dump all of the queues properly in dev core dumps. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index cba9b973f0a76..d9c2dab17f6b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4764,7 +4764,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 1, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -4779,7 +4779,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0c9b28a460503..c78458ecb88b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1581,7 +1581,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1594,7 +1594,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 7b20ab48f762b..ddac26b012bca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1355,7 +1355,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; -- GitLab From 8307ebc15c1ea98a8a0b7837af1faa6c01514577 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:56:02 -0400 Subject: [PATCH 056/899] drm/amdgpu/gfx6: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 9c6453b458b00..d86620f38855a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2881,8 +2881,6 @@ static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- GitLab From be7652c23d833d1ab2c67b16e173b1a4e69d1ae6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:57:19 -0400 Subject: [PATCH 057/899] drm/amdgpu/gfx7: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 6eb48ebd3f4ed..0fdc4362bbc00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3909,8 +3909,6 @@ static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- GitLab From c8b8d7a4f1c5cdfbd61d75302fb3e3cdefb1a7ab Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:57:34 -0400 Subject: [PATCH 058/899] drm/amdgpu/gfx8: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bfedd487efc53..fc73be4ab0685 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1248,8 +1248,6 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- GitLab From a4a4c0ae6742ec7d6bf1548d2c6828de440814a0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:57:49 -0400 Subject: [PATCH 059/899] drm/amdgpu/gfx9: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d7db4cb907ae5..d725e2e230a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1649,8 +1649,6 @@ static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- GitLab From 683308af030cd9b8d3f1de5cbc1ee51788878feb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:58:03 -0400 Subject: [PATCH 060/899] drm/amdgpu/gfx10: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d9c2dab17f6b2..792c31411ae3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4322,8 +4322,6 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- GitLab From a9a8bccaa3ba64d509cf7df387cf0b5e1cd06499 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:58:19 -0400 Subject: [PATCH 061/899] drm/amdgpu/gfx11: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c78458ecb88b2..cedfcd1184301 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -859,8 +859,6 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- GitLab From 8f1366fcb84677293d070e31843a87c41ab8f93f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 25 Mar 2025 11:18:42 -0600 Subject: [PATCH 062/899] Documentation/gpu: Add new acronyms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces some new acronyms extracted from the source code and found on some web pages around the internet (most of them came from ArchLinux, Gentoo, and Wikipedia links). Reviewed-by: Christian König Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/amdgpu-glossary.rst | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Documentation/gpu/amdgpu/amdgpu-glossary.rst b/Documentation/gpu/amdgpu/amdgpu-glossary.rst index 1e9283e076ba0..080c3f2250d18 100644 --- a/Documentation/gpu/amdgpu/amdgpu-glossary.rst +++ b/Documentation/gpu/amdgpu/amdgpu-glossary.rst @@ -12,15 +12,27 @@ we have a dedicated glossary for Display Core at The number of CUs that are active on the system. The number of active CUs may be less than SE * SH * CU depending on the board configuration. + BACO + Bus Alive, Chip Off + + BOCO + Bus Off, Chip Off + CE Constant Engine + CIK + Sea Islands + CP Command Processor CPLIB Content Protection Library + CS + Command Submission + CU Compute Unit @@ -33,6 +45,9 @@ we have a dedicated glossary for Display Core at EOP End Of Pipe/Pipeline + FLR + Function Level Reset + GART Graphics Address Remapping Table. This is the name we use for the GPUVM page table used by the GPU kernel driver. It remaps system resources @@ -80,6 +95,9 @@ we have a dedicated glossary for Display Core at KCQ Kernel Compute Queue + KFD + Kernel Fusion Driver + KGQ Kernel Graphics Queue @@ -89,6 +107,9 @@ we have a dedicated glossary for Display Core at MC Memory Controller + MCBP + Mid Command Buffer Preemption + ME MicroEngine (Graphics) @@ -125,9 +146,15 @@ we have a dedicated glossary for Display Core at SE Shader Engine + SGPR + Scalar General-Purpose Registers + SH SHader array + SI + Southern Islands + SMU/SMC System Management Unit / System Management Controller @@ -146,6 +173,9 @@ we have a dedicated glossary for Display Core at TA Trusted Application + TC + Texture Cache + TOC Table of Contents @@ -158,5 +188,11 @@ we have a dedicated glossary for Display Core at VCN Video Codec Next + VGPR + Vector General-Purpose Registers + + VMID + Virtual Memory ID + VPE Video Processing Engine -- GitLab From 5acd17d6d14ec1112a6427e7c02569cba6c46f2d Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 25 Mar 2025 11:18:43 -0600 Subject: [PATCH 063/899] Documentation/gpu: Change index order to show driver core first Since driver-core has an overview of the AMD GPU hardware structure, it makes more sense to keep it first. This commit move driver-core up in the index list. Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 4c75567854cb2..fd47324437a0e 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -7,8 +7,8 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. .. toctree:: - module-parameters driver-core + module-parameters display/index flashing xgmi -- GitLab From c6a1c23d1041f7a5f8f259face14212c6ce4f1a8 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 25 Mar 2025 11:18:44 -0600 Subject: [PATCH 064/899] Documentation/gpu: Create a documentation entry just for hardware info The APU and dGPU tables are hidden in the driver misc info, which makes it hard to find specific hardware info when users need it. This commit creates a single page for this information and adds it to the top of the amdgpu list to improve searchability. Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/amdgpu/amd-hardware-list-info.rst | 23 +++++++++++++++++++ Documentation/gpu/amdgpu/driver-misc.rst | 17 -------------- Documentation/gpu/amdgpu/index.rst | 1 + 3 files changed, 24 insertions(+), 17 deletions(-) create mode 100644 Documentation/gpu/amdgpu/amd-hardware-list-info.rst diff --git a/Documentation/gpu/amdgpu/amd-hardware-list-info.rst b/Documentation/gpu/amdgpu/amd-hardware-list-info.rst new file mode 100644 index 0000000000000..1786544fe7c11 --- /dev/null +++ b/Documentation/gpu/amdgpu/amd-hardware-list-info.rst @@ -0,0 +1,23 @@ +================================================= + AMD Hardware Components Information per Product +================================================= + +On this page, you can find the AMD product name and which component version is +part of it. + +Accelerated Processing Units (APU) Info +--------------------------------------- + +.. csv-table:: + :header-rows: 1 + :widths: 3, 2, 2, 1, 1, 1, 1 + :file: ./apu-asic-info-table.csv + +Discrete GPU Info +----------------- + +.. csv-table:: + :header-rows: 1 + :widths: 3, 2, 2, 1, 1, 1 + :file: ./dgpu-asic-info-table.csv + diff --git a/Documentation/gpu/amdgpu/driver-misc.rst b/Documentation/gpu/amdgpu/driver-misc.rst index e40e15f89fd33..25b0c857816ea 100644 --- a/Documentation/gpu/amdgpu/driver-misc.rst +++ b/Documentation/gpu/amdgpu/driver-misc.rst @@ -50,23 +50,6 @@ board_info .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c :doc: board_info -Accelerated Processing Units (APU) Info ---------------------------------------- - -.. csv-table:: - :header-rows: 1 - :widths: 3, 2, 2, 1, 1, 1, 1 - :file: ./apu-asic-info-table.csv - -Discrete GPU Info ------------------ - -.. csv-table:: - :header-rows: 1 - :widths: 3, 2, 2, 1, 1, 1 - :file: ./dgpu-asic-info-table.csv - - GPU Memory Usage Information ============================ diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index fd47324437a0e..1fdc3ccb61c47 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -8,6 +8,7 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. .. toctree:: driver-core + amd-hardware-list-info module-parameters display/index flashing -- GitLab From 4ede6d20047ae13855a49969b48c438d1a2ac054 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 25 Mar 2025 11:18:45 -0600 Subject: [PATCH 065/899] Documentation/gpu: Add explanation about AMD Pipes and Queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pipes and Queues are two common vocabulary that pervades discussions around amdgpu core features. The definition and explanation of those components are spread around multiple places in the code, mailing list, and Gitlab, which sometimes leads to the wrong interpretation of these concepts. This commit attempts to centralize the definition and explanation of Pipe and Queue from amdgpu perspective in a kernel doc. Most of the information in this doc was derived from: - https://lore.kernel.org/amd-gfx/CADnq5_Pcz2x4aJzKbVrN3jsZhD6sTydtDw=6PaN4O3m4t+Grtg@mail.gmail.com/T/#m9a670b55ab20e0f7c46c80f802a0a4be255a719d - https://gitlab.freedesktop.org/mesa/mesa/-/issues/11759 Reviewed-by: Christian König Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/driver-core.rst | 50 + .../gpu/amdgpu/pipe_and_queue_abstraction.svg | 1279 +++++++++++++++++ 2 files changed, 1329 insertions(+) create mode 100644 Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index 32723a925377e..d8500c9c961e8 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -98,6 +98,56 @@ RLC (RunList Controller) The name is a vestige of old hardware where it was originally added and doesn't really have much relation to what the engine does now. + +GFX, Compute, and SDMA Overall Behavior +======================================= + +.. note:: For simplicity, whenever the term block is used in this section, it + means GFX, Compute, and SDMA. + +GFX, Compute and SDMA share a similar form of operation that can be abstracted +to facilitate understanding of the behavior of these blocks. See the figure +below illustrating the common components of these blocks: + +.. kernel-figure:: pipe_and_queue_abstraction.svg + +In the central part of this figure, you can see two hardware elements, one called +**Pipes** and another called **Queues**; it is important to highlight that Queues +must be associated with a Pipe and vice-versa. Every specific hardware IP may have +a different number of Pipes and, in turn, a different number of Queues; for +example, GFX 11 has two Pipes and two Queues per Pipe for the GFX front end. + +Pipe is the hardware that processes the instructions available in the Queues; +in other words, it is a thread executing the operations inserted in the Queue. +One crucial characteristic of Pipes is that they can only execute one Queue at +a time; no matter if the hardware has multiple Queues in the Pipe, it only runs +one Queue per Pipe. + +Pipes have the mechanics of swapping between queues at the hardware level. +Nonetheless, they only make use of Queues that are considered mapped. Pipes can +switch between queues based on any of the following inputs: + +1. Command Stream; +2. Packet by Packet; +3. Other hardware requests the change (e.g., MES). + +Queues within Pipes are defined by the Hardware Queue Descriptors (HQD). +Associated with the HQD concept, we have the Memory Queue Descriptor (MQD), +which is responsible for storing information about the state of each of the +available Queues in the memory. The state of a Queue contains information such +as the GPU virtual address of the queue itself, save areas, doorbell, etc. The +MQD also stores the HQD registers, which are vital for activating or +deactivating a given Queue. The scheduling firmware (e.g., MES) is responsible +for loading HQDs from MQDs and vice versa. + +The Queue-switching process can also happen with the firmware requesting the +preemption or unmapping of a Queue. The firmware waits for the HQD_ACTIVE bit +to change to low before saving the state into the MQD. To make a different +Queue become active, the firmware copies the MQD state into the HQD registers +and loads any additional state. Finally, it sets the HQD_ACTIVE bit to high to +indicate that the queue is active. The Pipe will then execute work from active +Queues. + Driver Structure ================ diff --git a/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg b/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg new file mode 100644 index 0000000000000..0df3c6b3000b3 --- /dev/null +++ b/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg @@ -0,0 +1,1279 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . . . + + + + + + + + + + + + . . . + + . . . + Pipe[0] + MQD + Queue[0] + Queue[n] + ... + + + + + + + + + + + + + + + + + . . . + + + + + + + + + + + + . . . + + . . . + Pipe[1] + Queue[0] + Queue[n] + ... + + + + + + + + + + + + + + . . . + + + + + + + + + + + + . . . + + . . . + Pipe[n] + Queue[0] + Queue[n] + ... + + ... + + Hardware Block + EXECUTION + + Memory + + + + + + + + e.g.,:queue[0] + + + + + e.g.,:queue[4] + + + + + e.g.,:queue[n] + + HQD + + + + + HQD + + + + + HQD + + + + + HQD + + + + + HQD + + + + + HQD + + + + Registers + + MQD + MQD + MQD + MQD + MQD + ... + + + HQD RegistersQueue Address in the GPUDoorbell... + + SWITCH QUEUE:WAIT FOR HQD_ACTIVE = 0SAVE QUEUE STATE TO THE MQDCOPY NEW MQD STATESET HQD_ACTIVE = 1 + + Firmware + + + + -- GitLab From e7aaa5fbf4fc1aa9c348075aab9bf054727f025f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 25 Mar 2025 11:18:46 -0600 Subject: [PATCH 066/899] Documentation/gpu: Create a GC entry in the amdgpu documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GC is a large block that plays a vital role for amdgpu; for this reason, this commit creates one specific page for GC and adds extra information about the CP component. Acked-by: Christian König Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/driver-core.rst | 30 ++------------- Documentation/gpu/amdgpu/gc/index.rst | 47 ++++++++++++++++++++++++ Documentation/gpu/amdgpu/index.rst | 1 + 3 files changed, 52 insertions(+), 26 deletions(-) create mode 100644 Documentation/gpu/amdgpu/gc/index.rst diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index d8500c9c961e8..2c7f7299d7ad4 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -67,38 +67,16 @@ GC (Graphics and Compute) This is the graphics and compute engine, i.e., the block that encompasses the 3D pipeline and and shader blocks. This is by far the largest block on the GPU. The 3D pipeline has tons of sub-blocks. In - addition to that, it also contains the CP microcontrollers (ME, PFP, - CE, MEC) and the RLC microcontroller. It's exposed to userspace for - user mode drivers (OpenGL, Vulkan, OpenCL, etc.) + addition to that, it also contains the CP microcontrollers (ME, PFP, CE, + MEC) and the RLC microcontroller. It's exposed to userspace for user mode + drivers (OpenGL, Vulkan, OpenCL, etc.). More details in :ref:`Graphics (GFX) + and Compute `. VCN (Video Core Next) This is the multi-media engine. It handles video and image encode and decode. It's exposed to userspace for user mode drivers (VA-API, OpenMAX, etc.) -Graphics and Compute Microcontrollers -------------------------------------- - -CP (Command Processor) - The name for the hardware block that encompasses the front end of the - GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers - (PFP, ME, CE, MEC). The firmware that runs on these microcontrollers - provides the driver interface to interact with the GFX/Compute engine. - - MEC (MicroEngine Compute) - This is the microcontroller that controls the compute queues on the - GFX/compute engine. - - MES (MicroEngine Scheduler) - This is a new engine for managing queues. This is currently unused. - -RLC (RunList Controller) - This is another microcontroller in the GFX/Compute engine. It handles - power management related functionality within the GFX/Compute engine. - The name is a vestige of old hardware where it was originally added - and doesn't really have much relation to what the engine does now. - - GFX, Compute, and SDMA Overall Behavior ======================================= diff --git a/Documentation/gpu/amdgpu/gc/index.rst b/Documentation/gpu/amdgpu/gc/index.rst new file mode 100644 index 0000000000000..65ea93a3fbbe4 --- /dev/null +++ b/Documentation/gpu/amdgpu/gc/index.rst @@ -0,0 +1,47 @@ +.. _amdgpu-gc: + +======================================== + drm/amdgpu - Graphics and Compute (GC) +======================================== + +The relationship between the CPU and GPU can be described as the +producer-consumer problem, where the CPU fills out a buffer with operations +(producer) to be executed by the GPU (consumer). The requested operations in +the buffer are called Command Packets, which can be summarized as a compressed +way of transmitting command information to the graphics controller. + +The component that acts as the front end between the CPU and the GPU is called +the Command Processor (CP). This component is responsible for providing greater +flexibility to the GC since CP makes it possible to program various aspects of +the GPU pipeline. CP also coordinates the communication between the CPU and GPU +via a mechanism named **Ring Buffers**, where the CPU appends information to +the buffer while the GPU removes operations. It is relevant to highlight that a +CPU can add a pointer to the Ring Buffer that points to another region of +memory outside the Ring Buffer, and CP can handle it; this mechanism is called +**Indirect Buffer (IB)**. CP receives and parses the Command Streams (CS), and +writes the operations to the correct hardware blocks. + +Graphics (GFX) and Compute Microcontrollers +------------------------------------------- + +GC is a large block, and as a result, it has multiple firmware associated with +it. Some of them are: + +CP (Command Processor) + The name for the hardware block that encompasses the front end of the + GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers + (PFP, ME, CE, MEC). The firmware that runs on these microcontrollers + provides the driver interface to interact with the GFX/Compute engine. + + MEC (MicroEngine Compute) + This is the microcontroller that controls the compute queues on the + GFX/compute engine. + + MES (MicroEngine Scheduler) + This is the engine for managing queues. + +RLC (RunList Controller) + This is another microcontroller in the GFX/Compute engine. It handles + power management related functionality within the GFX/Compute engine. + The name is a vestige of old hardware where it was originally added + and doesn't really have much relation to what the engine does now. diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 1fdc3ccb61c47..bb2894b5edaf2 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -10,6 +10,7 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. driver-core amd-hardware-list-info module-parameters + gc/index display/index flashing xgmi -- GitLab From 74f0ff369f8a4f44082a71049a15d7da2e73ff7c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 25 Mar 2025 11:18:47 -0600 Subject: [PATCH 067/899] Documentation/gpu: Add an intro about MES MES is an important firmware that lacks some essential documentation. This commit introduces an overview of it and how it works. Reviewed-by: Bagas Sanjaya Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/driver-core.rst | 2 ++ Documentation/gpu/amdgpu/gc/index.rst | 7 ++++- Documentation/gpu/amdgpu/gc/mes.rst | 38 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 Documentation/gpu/amdgpu/gc/mes.rst diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index 2c7f7299d7ad4..7e3f5d1e9aaf4 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -77,6 +77,8 @@ VCN (Video Core Next) decode. It's exposed to userspace for user mode drivers (VA-API, OpenMAX, etc.) +.. _pipes-and-queues-description: + GFX, Compute, and SDMA Overall Behavior ======================================= diff --git a/Documentation/gpu/amdgpu/gc/index.rst b/Documentation/gpu/amdgpu/gc/index.rst index 65ea93a3fbbe4..ff6e9ef5cbeec 100644 --- a/Documentation/gpu/amdgpu/gc/index.rst +++ b/Documentation/gpu/amdgpu/gc/index.rst @@ -38,10 +38,15 @@ CP (Command Processor) GFX/compute engine. MES (MicroEngine Scheduler) - This is the engine for managing queues. + This is the engine for managing queues. For more details check + :ref:`MicroEngine Scheduler (MES) `. RLC (RunList Controller) This is another microcontroller in the GFX/Compute engine. It handles power management related functionality within the GFX/Compute engine. The name is a vestige of old hardware where it was originally added and doesn't really have much relation to what the engine does now. + +.. toctree:: + + mes.rst diff --git a/Documentation/gpu/amdgpu/gc/mes.rst b/Documentation/gpu/amdgpu/gc/mes.rst new file mode 100644 index 0000000000000..b99eb211b1797 --- /dev/null +++ b/Documentation/gpu/amdgpu/gc/mes.rst @@ -0,0 +1,38 @@ +.. _amdgpu-mes: + +============================= + MicroEngine Scheduler (MES) +============================= + +.. note:: + Queue and ring buffer are used as a synonymous. + +.. note:: + This section assumes that you are familiar with the concept of Pipes, Queues, and GC. + If not, check :ref:`GFX, Compute, and SDMA Overall Behavior` + and :ref:`drm/amdgpu - Graphics and Compute (GC) `. + +Every GFX has a pipe component with one or more hardware queues. Pipes can +switch between queues depending on certain conditions, and one of the +components that can request a queue switch to a pipe is the MicroEngine +Scheduler (MES). Whenever the driver is initialized, it creates one MQD per +hardware queue, and then the MQDs are handed to the MES firmware for mapping +to: + +1. Kernel Queues (legacy): This queue is statically mapped to HQDs and never + preempted. Even though this is a legacy feature, it is the current default, and + most existing hardware supports it. When an application submits work to the + kernel driver, it submits all of the application command buffers to the kernel + queues. The CS IOCTL takes the command buffer from the applications and + schedules them on the kernel queue. + +2. User Queues: These queues are dynamically mapped to the HQDs. Regarding the + utilization of User Queues, the userspace application will create its user + queues and submit work directly to its user queues with no need to IOCTL for + each submission and no need to share a single kernel queue. + +In terms of User Queues, MES can dynamically map them to the HQD. If there are +more MQDs than HQDs, the MES firmware will preempt other user queues to make +sure each queues get a time slice; in other words, MES is a microcontroller +that handles the mapping and unmapping of MQDs into HQDs, as well as the +priorities and oversubscription of MQDs. -- GitLab From aa52eb6d1672cd75507d235a9f6e1b93fb5d3bb4 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Thu, 13 Feb 2025 15:08:37 +0800 Subject: [PATCH 068/899] Documentation: Remove repeated word in docs Remove the repeated word "the" in docs. Signed-off-by: Charles Han Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/display/dc-debug.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst index 013f63b271f35..605dca21f4aed 100644 --- a/Documentation/gpu/amdgpu/display/dc-debug.rst +++ b/Documentation/gpu/amdgpu/display/dc-debug.rst @@ -154,7 +154,7 @@ of the display parameters, but the userspace might also cause this issue. One way to identify the source of the problem is to take a screenshot or make a desktop video capture when the problem happens; after checking the screenshot/video recording, if you don't see any of the artifacts, it means -that the issue is likely on the the driver side. If you can still see the +that the issue is likely on the driver side. If you can still see the problem in the data collected, it is an issue that probably happened during rendering, and the display code just got the framebuffer already corrupted. -- GitLab From 1189c4fb6f4ca6b1af83573843451443496ffe3c Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 17 Mar 2025 14:37:37 +0800 Subject: [PATCH 069/899] drm/amd/pm: Expose smu_v13_0_6 caps Expose smu_v13_0_6 caps by moving it to common header Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 22 ++----------------- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h | 18 +++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 9264dc33ee7e9..f8489ebbd2ad9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -101,24 +101,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -#define SMU_CAP(x) SMU_13_0_6_CAPS_##x - -enum smu_v13_0_6_caps { - SMU_CAP(DPM), - SMU_CAP(DPM_POLICY), - SMU_CAP(OTHER_END_METRICS), - SMU_CAP(SET_UCLK_MAX), - SMU_CAP(PCIE_METRICS), - SMU_CAP(MCA_DEBUG_MODE), - SMU_CAP(PER_INST_METRICS), - SMU_CAP(CTF_LIMIT), - SMU_CAP(RMA_MSG), - SMU_CAP(ACA_SYND), - SMU_CAP(SDMA_RESET), - SMU_CAP(STATIC_METRICS), - SMU_CAP(ALL), -}; - struct mca_bank_ipid { enum amdgpu_mca_ip ip; uint16_t hwid; @@ -299,8 +281,8 @@ static inline void smu_v13_0_6_cap_clear(struct smu_context *smu, dpm_context->caps &= ~BIT_ULL(cap); } -static inline bool smu_v13_0_6_cap_supported(struct smu_context *smu, - enum smu_v13_0_6_caps cap) +bool smu_v13_0_6_cap_supported(struct smu_context *smu, + enum smu_v13_0_6_caps cap) { struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index 83745909e5644..f28b1401fc76c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -26,6 +26,7 @@ #define SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL 0x2 #define SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL 0x4 #define SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL 0x2 +#define SMU_CAP(x) SMU_13_0_6_CAPS_##x typedef enum { /*0*/ METRICS_VERSION_V0 = 0, @@ -51,6 +52,23 @@ struct PPTable_t { bool Init; }; +enum smu_v13_0_6_caps { + SMU_CAP(DPM), + SMU_CAP(DPM_POLICY), + SMU_CAP(OTHER_END_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(PCIE_METRICS), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(PER_INST_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND), + SMU_CAP(SDMA_RESET), + SMU_CAP(STATIC_METRICS), + SMU_CAP(ALL), +}; + extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); +bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap); #endif -- GitLab From 084769f493a60ac909ad8e9cbcc14122d9c28484 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 17 Mar 2025 14:55:38 +0800 Subject: [PATCH 070/899] drm/amd/pm: Use gpu_metrics_v1_8 for smu_v13_0_6 Use gpu_metrics_v1_8 for smu_v13_0_6 to fill metrics data v2: Move exposing caps to separate patch, move smu_v13.0.12 gpu metrics 1.8 usage to separate patch (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 22 +++++++++++++++---- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h | 1 + 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f8489ebbd2ad9..4f767948b1727 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -507,7 +507,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -2468,8 +2468,8 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_7 *gpu_metrics = - (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_8 *gpu_metrics = + (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int version = smu_v13_0_6_get_metrics_version(smu); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; @@ -2495,7 +2495,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 8); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)); @@ -2648,6 +2648,20 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(GET_GPU_METRIC_FIELD(GfxBusyAcc, version)[inst]); + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(HST_LIMIT_METRICS))) { + gpu_metrics->xcp_stats[i].gfx_below_host_limit_ppt_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkBelowHostLimitPptAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_thm_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkBelowHostLimitThmAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_low_utilization_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkLowUtilizationAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_total_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkBelowHostLimitTotalAcc[inst]); + } idx++; } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index f28b1401fc76c..2a8f42dbd9f57 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -65,6 +65,7 @@ enum smu_v13_0_6_caps { SMU_CAP(ACA_SYND), SMU_CAP(SDMA_RESET), SMU_CAP(STATIC_METRICS), + SMU_CAP(HST_LIMIT_METRICS), SMU_CAP(ALL), }; -- GitLab From 7ac66f9355457e16ea5876cd9192b680f474b316 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 17 Mar 2025 15:03:46 +0800 Subject: [PATCH 071/899] drm/amd/pm: Use gpu_metrics_v1_8 for smu_v13_0_12 Use gpu_metrics_v1_8 for smu_v13_0_12 to fill metrics data Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 16 +++++++++++++--- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 238bd71baa6dd..5d4437e413cc2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -345,8 +345,8 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_7 *gpu_metrics = - (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_8 *gpu_metrics = + (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; u8 num_jpeg_rings_gpu_metrics; @@ -357,7 +357,7 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); memcpy(metrics, smu_table->metrics_table, sizeof(MetricsTable_t)); - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 8); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(metrics->MaxSocketTemperature); @@ -474,6 +474,16 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) SMUQ10_ROUND(metrics->GfxBusy[inst]); gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(metrics->GfxBusyAcc[inst]); + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(HST_LIMIT_METRICS))) { + gpu_metrics->xcp_stats[i].gfx_below_host_limit_ppt_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkBelowHostLimitPptAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_thm_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkBelowHostLimitThmAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_low_utilization_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkLowUtilizationAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_total_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkBelowHostLimitTotalAcc[inst]); + } idx++; } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 4f767948b1727..8900c94bda0ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -335,6 +335,7 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) if (fw_ver >= 0x00561E00) smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); + } static void smu_v13_0_6_init_caps(struct smu_context *smu) -- GitLab From 0f774fce4499d30378372862b22490673ca5821d Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 11 Mar 2025 15:43:07 -0400 Subject: [PATCH 072/899] drm/amd/display: convert DRM_ERROR to drm_err in hpd_rx_irq_create_workqueue() pass in a pointer to amdgpu_device directly to the function. Reviewed-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8df93d23b723d..8df1f94d25aa7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1563,8 +1563,9 @@ skip: } -static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc) +static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct amdgpu_device *adev) { + struct dc *dc = adev->dm.dc; int max_caps = dc->caps.max_links; int i = 0; struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL; @@ -1580,7 +1581,7 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq"); if (hpd_rx_offload_wq[i].wq == NULL) { - DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!"); + drm_err(adev_to_drm(adev), "create amdgpu_dm_hpd_rx_offload_wq fail!"); goto out_err; } @@ -2146,7 +2147,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); - adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc); + adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n"); goto error; -- GitLab From 93717be16e99ca4e424e2d2f636b8763777f7472 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 11 Mar 2025 15:51:03 -0400 Subject: [PATCH 073/899] drm/amd/display: use drm_err in hpd rx offload add amdgpu_device pointer to data associated with the work struct such that hpd handlers has access to the drm device for use with drm_err() Reviewed-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++++++++++--------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 4 ++++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8df1f94d25aa7..af80baeec707a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1483,18 +1483,18 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work) offload_work = container_of(work, struct hpd_rx_irq_offload_work, work); aconnector = offload_work->offload_wq->aconnector; + adev = offload_work->adev; if (!aconnector) { - DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work"); + drm_err(adev_to_drm(adev), "Can't retrieve aconnector in hpd_rx_irq_offload_work"); goto skip; } - adev = drm_to_adev(aconnector->base.dev); dc_link = aconnector->dc_link; mutex_lock(&aconnector->hpd_lock); if (!dc_link_detect_connection_type(dc_link, &new_connection_type)) - DRM_ERROR("KMS: Failed to detect connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n"); mutex_unlock(&aconnector->hpd_lock); if (new_connection_type == dc_connection_none) @@ -3941,20 +3941,21 @@ static void handle_hpd_irq(void *param) } -static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq, +static void schedule_hpd_rx_offload_work(struct amdgpu_device *adev, struct hpd_rx_irq_offload_work_queue *offload_wq, union hpd_irq_data hpd_irq_data) { struct hpd_rx_irq_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); if (!offload_work) { - DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n"); + drm_err(adev_to_drm(adev), "Failed to allocate hpd_rx_irq_offload_work.\n"); return; } INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work); offload_work->data = hpd_irq_data; offload_work->offload_wq = offload_wq; + offload_work->adev = adev; queue_work(offload_wq->wq, &offload_work->work); DRM_DEBUG_KMS("queue work to handle hpd_rx offload work"); @@ -3996,7 +3997,7 @@ static void handle_hpd_rx_irq(void *param) goto out; if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { - schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data); goto out; } @@ -4018,7 +4019,7 @@ static void handle_hpd_rx_irq(void *param) spin_unlock(&offload_wq->offload_lock); if (!skip) - schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data); goto out; } @@ -4035,7 +4036,7 @@ static void handle_hpd_rx_irq(void *param) spin_unlock(&offload_wq->offload_lock); if (!skip) - schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data); goto out; } @@ -4045,7 +4046,7 @@ out: if (result && !is_mst_root_connector) { /* Downstream Port status changed. */ if (!dc_link_detect_connection_type(dc_link, &new_connection_type)) - DRM_ERROR("KMS: Failed to detect connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n"); if (aconnector->base.force && new_connection_type == dc_connection_none) { emulated_link_detect(dc_link); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 385faaca6e26a..740ff0b1fc130 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -276,6 +276,10 @@ struct hpd_rx_irq_offload_work { * @offload_wq: offload work queue that this work is queued to */ struct hpd_rx_irq_offload_work_queue *offload_wq; + /** + * @adev: amdgpu_device pointer + */ + struct amdgpu_device *adev; }; /** -- GitLab From 769e07136a298afe96372e55ded61f5e3fdd6bab Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 11 Mar 2025 15:55:55 -0400 Subject: [PATCH 074/899] drm/amd/display: use drm_err in create_validate_stream_for_sink() make the drm device available in create_validate_stream_for_sink() so that drm_err() can be used Reviewed-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index af80baeec707a..f4c743de7def3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6535,7 +6535,7 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, } static struct dc_sink * -create_fake_sink(struct dc_link *link) +create_fake_sink(struct drm_device *dev, struct dc_link *link) { struct dc_sink_init_data sink_init_data = { 0 }; struct dc_sink *sink = NULL; @@ -6545,7 +6545,7 @@ create_fake_sink(struct dc_link *link) sink = dc_sink_create(&sink_init_data); if (!sink) { - DRM_ERROR("Failed to create sink!\n"); + drm_err(dev, "Failed to create sink!\n"); return NULL; } sink->sink_signal = SIGNAL_TYPE_VIRTUAL; @@ -6901,6 +6901,7 @@ create_stream_for_sink(struct drm_connector *connector, const struct dc_stream_state *old_stream, int requested_bpc) { + struct drm_device *dev = connector->dev; struct amdgpu_dm_connector *aconnector = NULL; struct drm_display_mode *preferred_mode = NULL; const struct drm_connector_state *con_state = &dm_state->base; @@ -6924,7 +6925,7 @@ create_stream_for_sink(struct drm_connector *connector, memset(&saved_mode, 0, sizeof(saved_mode)); if (connector == NULL) { - DRM_ERROR("connector is NULL!\n"); + drm_err(dev, "connector is NULL!\n"); return stream; } @@ -6942,7 +6943,7 @@ create_stream_for_sink(struct drm_connector *connector, } if (!aconnector || !aconnector->dc_sink) { - sink = create_fake_sink(link); + sink = create_fake_sink(dev, link); if (!sink) return stream; @@ -6954,7 +6955,7 @@ create_stream_for_sink(struct drm_connector *connector, stream = dc_create_stream_for_sink(sink); if (stream == NULL) { - DRM_ERROR("Failed to create stream for sink!\n"); + drm_err(dev, "Failed to create stream for sink!\n"); goto finish; } @@ -7638,7 +7639,7 @@ create_validate_stream_for_sink(struct drm_connector *connector, dm_state, old_stream, requested_bpc); if (stream == NULL) { - DRM_ERROR("Failed to create stream for sink!\n"); + drm_err(adev_to_drm(adev), "Failed to create stream for sink!\n"); break; } -- GitLab From 880ab14a4acace958eaaf780231ca3f60454f718 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 11 Mar 2025 15:34:53 -0400 Subject: [PATCH 075/899] drm/amd/display: convert more DRM_ERROR to drm_err prefer drm_err instead of DRM_ERROR since the former prints the associated DRM device, which is helpful when debugging multi-gpu use cases. Reviewed-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 207 +++++++++--------- 1 file changed, 104 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f4c743de7def3..a4dfffc89f1fe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -280,7 +280,7 @@ static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc) acrtc = adev->mode_info.crtcs[crtc]; if (!acrtc->dm_irq_params.stream) { - DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n", + drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n", crtc); return 0; } @@ -301,7 +301,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, acrtc = adev->mode_info.crtcs[crtc]; if (!acrtc->dm_irq_params.stream) { - DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n", + drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n", crtc); return 0; } @@ -772,12 +772,12 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, return; if (notify == NULL) { - DRM_ERROR("DMUB HPD callback notification was NULL"); + drm_err(adev_to_drm(adev), "DMUB HPD callback notification was NULL"); return; } if (notify->link_index > adev->dm.dc->link_count) { - DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index); + drm_err(adev_to_drm(adev), "DMUB HPD index (%u)is abnormal", notify->link_index); return; } @@ -871,7 +871,7 @@ static void dm_handle_hpd_work(struct work_struct *work) dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work); if (!dmub_hpd_wrk->dmub_notify) { - DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL"); + drm_err(adev_to_drm(dmub_hpd_wrk->adev), "dmub_hpd_wrk dmub_notify is NULL"); return; } @@ -935,7 +935,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { - DRM_ERROR("DM: notify type %d invalid!", notify.type); + drm_err(adev_to_drm(adev), "DM: notify type %d invalid!", notify.type); continue; } if (!dm->dmub_callback[notify.type]) { @@ -946,14 +946,14 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) if (dm->dmub_thread_offload[notify.type] == true) { dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC); if (!dmub_hpd_wrk) { - DRM_ERROR("Failed to allocate dmub_hpd_wrk"); + drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk"); return; } dmub_hpd_wrk->dmub_notify = kmemdup(¬ify, sizeof(struct dmub_notification), GFP_ATOMIC); if (!dmub_hpd_wrk->dmub_notify) { kfree(dmub_hpd_wrk); - DRM_ERROR("Failed to allocate dmub_hpd_wrk->dmub_notify"); + drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk->dmub_notify"); return; } INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work); @@ -1011,7 +1011,7 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector) &compressor->gpu_addr, &compressor->cpu_addr); if (r) - DRM_ERROR("DM: Failed to initialize FBC\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize FBC\n"); else { adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr; DRM_INFO("DM: FBC alloc %lu\n", max_size*4); @@ -1179,13 +1179,13 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; if (!fb_info) { - DRM_ERROR("No framebuffer info for DMUB service.\n"); + drm_err(adev_to_drm(adev), "No framebuffer info for DMUB service.\n"); return -EINVAL; } if (!dmub_fw) { /* Firmware required for DMUB support. */ - DRM_ERROR("No firmware provided for DMUB.\n"); + drm_err(adev_to_drm(adev), "No firmware provided for DMUB.\n"); return -EINVAL; } @@ -1195,7 +1195,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) status = dmub_srv_has_hw_support(dmub_srv, &has_hw_support); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error checking HW support for DMUB: %d\n", status); + drm_err(adev_to_drm(adev), "Error checking HW support for DMUB: %d\n", status); return -EINVAL; } @@ -1297,7 +1297,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) status = dmub_srv_hw_init(dmub_srv, &hw_params); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error initializing DMUB HW: %d\n", status); + drm_err(adev_to_drm(adev), "Error initializing DMUB HW: %d\n", status); return -EINVAL; } @@ -1315,7 +1315,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) if (!adev->dm.dc->ctx->dmub_srv) adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv); if (!adev->dm.dc->ctx->dmub_srv) { - DRM_ERROR("Couldn't allocate DC DMUB server!\n"); + drm_err(adev_to_drm(adev), "Couldn't allocate DC DMUB server!\n"); return -ENOMEM; } @@ -1373,7 +1373,7 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev) /* Perform the full hardware initialization. */ r = dm_dmub_hw_init(adev); if (r) - DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); } } @@ -1972,7 +1972,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) mutex_init(&adev->dm.audio_lock); if (amdgpu_dm_irq_init(adev)) { - DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize DM IRQ support.\n"); goto error; } @@ -2141,7 +2141,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) r = dm_dmub_hw_init(adev); if (r) { - DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); goto error; } @@ -2149,7 +2149,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { - DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd rx offload workqueue.\n"); goto error; } @@ -2164,7 +2164,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.freesync_module = mod_freesync_create(adev->dm.dc); if (!adev->dm.freesync_module) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize freesync_module.\n"); } else DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n", @@ -2176,7 +2176,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.vblank_control_workqueue = create_singlethread_workqueue("dm_vblank_control_workqueue"); if (!adev->dm.vblank_control_workqueue) - DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize vblank_workqueue.\n"); } if (adev->dm.dc->caps.ips_support && @@ -2187,7 +2187,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc); if (!adev->dm.hdcp_workqueue) - DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize hdcp_workqueue.\n"); else DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue); @@ -2203,14 +2203,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq"); if (!adev->dm.delayed_hpd_wq) { - DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd offload workqueue.\n"); goto error; } amdgpu_dm_outbox_init(adev); if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY, dmub_aux_setconfig_callback, false)) { - DRM_ERROR("amdgpu: fail to register dmub aux callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub aux callback"); goto error; } /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. @@ -2227,7 +2227,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } if (amdgpu_dm_initialize_drm_device(adev)) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize sw for display support.\n"); goto error; } @@ -2243,14 +2243,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { drm_err(adev_to_drm(adev), - "amdgpu: failed to initialize vblank sw for display support.\n"); + "amdgpu: failed to initialize sw for display support.\n"); goto error; } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) amdgpu_dm_crtc_secure_display_create_contexts(adev); if (!adev->dm.secure_display_ctx.crtc_ctx) - DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize secure display contexts.\n"); if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1)) adev->dm.secure_display_ctx.support_mul_roi = true; @@ -2430,7 +2430,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) default: break; } - DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); + drm_err(adev_to_drm(adev), "Unsupported ASIC type: 0x%X\n", adev->asic_type); return -EINVAL; } @@ -2582,7 +2582,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) dmub_srv = adev->dm.dmub_srv; if (!dmub_srv) { - DRM_ERROR("Failed to allocate DMUB service!\n"); + drm_err(adev_to_drm(adev), "Failed to allocate DMUB service!\n"); return -ENOMEM; } @@ -2595,7 +2595,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) /* Create the DMUB service. */ status = dmub_srv_create(dmub_srv, &create_params); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error creating DMUB service: %d\n", status); + drm_err(adev_to_drm(adev), "Error creating DMUB service: %d\n", status); return -EINVAL; } @@ -2620,7 +2620,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) ®ion_info); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error calculating DMUB region info: %d\n", status); + drm_err(adev_to_drm(adev), "Error calculating DMUB region info: %d\n", status); return -EINVAL; } @@ -2649,14 +2649,14 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) fb_info = adev->dm.dmub_fb_info; if (!fb_info) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "Failed to allocate framebuffer info for DMUB service!\n"); return -ENOMEM; } status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error calculating DMUB FB info: %d\n", status); + drm_err(adev_to_drm(adev), "Error calculating DMUB FB info: %d\n", status); return -EINVAL; } @@ -2673,7 +2673,7 @@ static int dm_sw_init(struct amdgpu_ip_block *ip_block) adev->dm.cgs_device = amdgpu_cgs_create_device(adev); if (!adev->dm.cgs_device) { - DRM_ERROR("amdgpu: failed to create cgs device.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to create cgs device.\n"); return -EINVAL; } @@ -2979,7 +2979,7 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) ret = amdgpu_dpm_write_watermarks_table(adev); if (ret) { - DRM_ERROR("Failed to update WMTABLE!\n"); + drm_err(adev_to_drm(adev), "Failed to update WMTABLE!\n"); return ret; } @@ -4109,19 +4109,19 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (dc_is_dmub_outbox_supported(adev->dm.dc)) { if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback"); return -EINVAL; } if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback"); return -EINVAL; } if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, dmub_hpd_sense_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd sense callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd sense callback"); return -EINVAL; } } @@ -4142,7 +4142,7 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_HPD1 || int_params.irq_source > DC_IRQ_SOURCE_HPD6) { - DRM_ERROR("Failed to register hpd irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register hpd irq!\n"); return -EINVAL; } @@ -4160,7 +4160,7 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_HPD1RX || int_params.irq_source > DC_IRQ_SOURCE_HPD6RX) { - DRM_ERROR("Failed to register hpd rx irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register hpd rx irq!\n"); return -EINVAL; } @@ -4202,7 +4202,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq); if (r) { - DRM_ERROR("Failed to add crtc irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n"); return r; } @@ -4213,7 +4213,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 || int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) { - DRM_ERROR("Failed to register vblank irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n"); return -EINVAL; } @@ -4232,7 +4232,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq); if (r) { - DRM_ERROR("Failed to add page flip irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n"); return r; } @@ -4243,7 +4243,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST || int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) { - DRM_ERROR("Failed to register pflip irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n"); return -EINVAL; } @@ -4261,7 +4261,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, client_id, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) { - DRM_ERROR("Failed to add hpd irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n"); return r; } @@ -4303,7 +4303,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->crtc_irq); if (r) { - DRM_ERROR("Failed to add crtc irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n"); return r; } @@ -4314,7 +4314,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 || int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) { - DRM_ERROR("Failed to register vblank irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n"); return -EINVAL; } @@ -4332,7 +4332,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i += 2) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->vupdate_irq); if (r) { - DRM_ERROR("Failed to add vupdate irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n"); return r; } @@ -4343,7 +4343,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 || int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) { - DRM_ERROR("Failed to register vupdate irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n"); return -EINVAL; } @@ -4362,7 +4362,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq); if (r) { - DRM_ERROR("Failed to add page flip irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n"); return r; } @@ -4373,7 +4373,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST || int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) { - DRM_ERROR("Failed to register pflip irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n"); return -EINVAL; } @@ -4391,7 +4391,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, client_id, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) { - DRM_ERROR("Failed to add hpd irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n"); return r; } @@ -4441,7 +4441,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->crtc_irq); if (r) { - DRM_ERROR("Failed to add crtc irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n"); return r; } @@ -4452,7 +4452,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 || int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) { - DRM_ERROR("Failed to register vblank irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n"); return -EINVAL; } @@ -4473,7 +4473,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) vrtl_int_srcid[i], &adev->vline0_irq); if (r) { - DRM_ERROR("Failed to add vline0 irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add vline0 irq id!\n"); return r; } @@ -4484,7 +4484,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_DC1_VLINE0 || int_params.irq_source > DC_IRQ_SOURCE_DC6_VLINE0) { - DRM_ERROR("Failed to register vline0 irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vline0 irq!\n"); return -EINVAL; } @@ -4512,7 +4512,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq); if (r) { - DRM_ERROR("Failed to add vupdate irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n"); return r; } @@ -4523,7 +4523,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 || int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) { - DRM_ERROR("Failed to register vupdate irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n"); return -EINVAL; } @@ -4543,7 +4543,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) i++) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq); if (r) { - DRM_ERROR("Failed to add page flip irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n"); return r; } @@ -4554,7 +4554,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST || int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) { - DRM_ERROR("Failed to register pflip irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n"); return -EINVAL; } @@ -4572,7 +4572,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT, &adev->hpd_irq); if (r) { - DRM_ERROR("Failed to add hpd irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n"); return r; } @@ -4594,7 +4594,7 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT, &adev->dmub_outbox_irq); if (r) { - DRM_ERROR("Failed to add outbox irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add outbox irq id!\n"); return r; } @@ -5039,7 +5039,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) dm->brightness[aconnector->bl_idx] = props.brightness; if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) { - DRM_ERROR("DM: Backlight registration failed!\n"); + drm_err(drm, "DM: Backlight registration failed!\n"); dm->backlight_dev[aconnector->bl_idx] = NULL; } else DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name); @@ -5056,7 +5056,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm, plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL); if (!plane) { - DRM_ERROR("KMS: Failed to allocate plane\n"); + drm_err(adev_to_drm(dm->adev), "KMS: Failed to allocate plane\n"); return -ENOMEM; } plane->type = plane_type; @@ -5074,7 +5074,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm, ret = amdgpu_dm_plane_init(dm, plane, possible_crtcs, plane_cap); if (ret) { - DRM_ERROR("KMS: Failed to initialize plane\n"); + drm_err(adev_to_drm(dm->adev), "KMS: Failed to initialize plane\n"); kfree(plane); return ret; } @@ -5143,14 +5143,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) link_cnt = dm->dc->caps.max_links; if (amdgpu_dm_mode_config_init(dm->adev)) { - DRM_ERROR("DM: Failed to initialize mode config\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize mode config\n"); return -EINVAL; } /* There is one primary plane per CRTC */ primary_planes = dm->dc->caps.max_streams; if (primary_planes > AMDGPU_MAX_PLANES) { - DRM_ERROR("DM: Plane nums out of 6 planes\n"); + drm_err(adev_to_drm(adev), "DM: Plane nums out of 6 planes\n"); return -EINVAL; } @@ -5163,7 +5163,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (initialize_plane(dm, mode_info, i, DRM_PLANE_TYPE_PRIMARY, plane)) { - DRM_ERROR("KMS: Failed to initialize primary plane\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize primary plane\n"); goto fail; } } @@ -5195,14 +5195,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (initialize_plane(dm, NULL, primary_planes + i, DRM_PLANE_TYPE_OVERLAY, plane)) { - DRM_ERROR("KMS: Failed to initialize overlay plane\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize overlay plane\n"); goto fail; } } for (i = 0; i < dm->dc->caps.max_streams; i++) if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) { - DRM_ERROR("KMS: Failed to initialize crtc\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize crtc\n"); goto fail; } @@ -5222,7 +5222,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case IP_VERSION(3, 6, 0): case IP_VERSION(4, 0, 1): if (register_outbox_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; @@ -5272,7 +5272,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } if (link_cnt > MAX_LINKS) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "KMS: Cannot support more than %d display indexes\n", MAX_LINKS); goto fail; @@ -5288,12 +5288,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL); if (!wbcon) { - DRM_ERROR("KMS: Failed to allocate writeback connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to allocate writeback connector\n"); continue; } if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) { - DRM_ERROR("KMS: Failed to initialize writeback connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize writeback connector\n"); kfree(wbcon); continue; } @@ -5313,12 +5313,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; if (amdgpu_dm_encoder_init(dm->ddev, aencoder, i)) { - DRM_ERROR("KMS: Failed to initialize encoder\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize encoder\n"); goto fail; } if (amdgpu_dm_connector_init(dm, aconnector, i, aencoder)) { - DRM_ERROR("KMS: Failed to initialize connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize connector\n"); goto fail; } @@ -5327,7 +5327,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) aconnector; if (!dc_link_detect_connection_type(link, &new_connection_type)) - DRM_ERROR("KMS: Failed to detect connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n"); if (aconnector->base.force && new_connection_type == dc_connection_none) { emulated_link_detect(link); @@ -5364,7 +5364,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_VERDE: case CHIP_OLAND: if (dce60_register_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; @@ -5386,7 +5386,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: if (dce110_register_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; @@ -5414,12 +5414,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case IP_VERSION(3, 6, 0): case IP_VERSION(4, 0, 1): if (dcn10_register_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; default: - DRM_ERROR("Unsupported DCE IP versions: 0x%X\n", + drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%X\n", amdgpu_ip_version(adev, DCE_HWIP, 0)); goto fail; } @@ -5692,7 +5692,7 @@ static int dm_early_init(struct amdgpu_ip_block *ip_block) adev->mode_info.num_dig = 4; break; default: - DRM_ERROR("Unsupported DCE IP versions: 0x%x\n", + drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%x\n", amdgpu_ip_version(adev, DCE_HWIP, 0)); return -EINVAL; } @@ -5841,7 +5841,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; break; default: - DRM_ERROR( + drm_err(adev_to_drm(adev), "Unsupported screen format %p4cc\n", &fb->format->format); return -EINVAL; @@ -7437,6 +7437,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) struct dc_sink *dc_em_sink = aconnector->dc_em_sink; const struct drm_edid *drm_edid; struct i2c_adapter *ddc; + struct drm_device *dev = connector->dev; if (dc_link && dc_link->aux_mode) ddc = &aconnector->dm_dp_aux.aux.ddc; @@ -7446,7 +7447,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { - DRM_ERROR("No EDID found on connector: %s.\n", connector->name); + drm_err(dev, "No EDID found on connector: %s.\n", connector->name); return; } @@ -7505,7 +7506,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { - DRM_ERROR("No EDID found on connector: %s.\n", connector->name); + drm_err(connector->dev, "No EDID found on connector: %s.\n", connector->name); return; } @@ -7714,7 +7715,7 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec if (dc_sink == NULL && aconnector->base.force != DRM_FORCE_ON_DIGITAL && aconnector->base.force != DRM_FORCE_ON) { - DRM_ERROR("dc_sink is NULL!\n"); + drm_err(connector->dev, "dc_sink is NULL!\n"); goto fail; } @@ -8622,7 +8623,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, i2c = create_i2c(link->ddc, false); if (!i2c) { - DRM_ERROR("Failed to create i2c adapter data\n"); + drm_err(adev_to_drm(dm->adev), "Failed to create i2c adapter data\n"); return -ENOMEM; } @@ -8630,7 +8631,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, res = i2c_add_adapter(&i2c->base); if (res) { - DRM_ERROR("Failed to register hw i2c %d\n", link->link_index); + drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index); goto out_free; } @@ -8644,7 +8645,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, &i2c->base); if (res) { - DRM_ERROR("connector_init failed\n"); + drm_err(adev_to_drm(dm->adev), "connector_init failed\n"); aconnector->connector_id = -1; goto out_free; } @@ -9229,13 +9230,13 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane, if (crtc_state->stream) { if (!dc_stream_set_cursor_attributes(crtc_state->stream, &attributes)) - DRM_ERROR("DC failed to set cursor attributes\n"); + drm_err(adev_to_drm(adev), "DC failed to set cursor attributes\n"); update->cursor_attributes = &crtc_state->stream->cursor_attributes; if (!dc_stream_set_cursor_position(crtc_state->stream, &position)) - DRM_ERROR("DC failed to set cursor position\n"); + drm_err(adev_to_drm(adev), "DC failed to set cursor position\n"); update->cursor_position = &crtc_state->stream->cursor_position; } @@ -9486,7 +9487,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].surface = dc_plane; if (!bundle->surface_updates[planes_count].surface) { - DRM_ERROR("No surface for CRTC: id=%d\n", + drm_err(dev, "No surface for CRTC: id=%d\n", acrtc_attach->crtc_id); continue; } @@ -10002,20 +10003,20 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL); if (!wb_info) { - DRM_ERROR("Failed to allocate wb_info\n"); + drm_err(adev_to_drm(adev), "Failed to allocate wb_info\n"); return; } acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc); if (!acrtc) { - DRM_ERROR("no amdgpu_crtc found\n"); + drm_err(adev_to_drm(adev), "no amdgpu_crtc found\n"); kfree(wb_info); return; } afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb); if (!afb) { - DRM_ERROR("No amdgpu_framebuffer found\n"); + drm_err(adev_to_drm(adev), "No amdgpu_framebuffer found\n"); kfree(wb_info); return; } @@ -10328,7 +10329,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) */ dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC); if (!dummy_updates) { - DRM_ERROR("Failed to allocate memory for dummy_updates.\n"); + drm_err(adev_to_drm(adev), "Failed to allocate memory for dummy_updates.\n"); continue; } for (j = 0; j < status->plane_count; j++) @@ -10563,7 +10564,7 @@ static int dm_force_atomic_commit(struct drm_connector *connector) out: drm_atomic_state_put(state); if (ret) - DRM_ERROR("Restoring old state failed with %i\n", ret); + drm_err(ddev, "Restoring old state failed with %i\n", ret); return ret; } @@ -10647,7 +10648,7 @@ static int do_aquire_global_lock(struct drm_device *dev, &commit->flip_done, 10*HZ); if (ret == 0) - DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done timed out\n", + drm_err(dev, "[CRTC:%d:%s] hw_done or flip_done timed out\n", crtc->base.id, crtc->name); drm_crtc_commit_put(commit); @@ -11763,7 +11764,7 @@ static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, old_plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) { - DRM_ERROR("Failed to get plane state for plane %s\n", plane->name); + drm_err(dev, "Failed to get plane state for plane %s\n", plane->name); return false; } @@ -12332,7 +12333,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); if (!res) { - DRM_ERROR("EDID CEA parser failed\n"); + drm_err(adev_to_drm(dm->adev), "EDID CEA parser failed\n"); return false; } @@ -12340,7 +12341,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, if (output->type == DMUB_CMD__EDID_CEA_ACK) { if (!output->ack.success) { - DRM_ERROR("EDID CEA ack failed at offset %d\n", + drm_err(adev_to_drm(dm->adev), "EDID CEA ack failed at offset %d\n", output->ack.offset); } } else if (output->type == DMUB_CMD__EDID_CEA_AMD_VSDB) { @@ -12568,7 +12569,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE; if (!connector->state) { - DRM_ERROR("%s - Connector has no state", __func__); + drm_err(adev_to_drm(adev), "%s - Connector has no state", __func__); goto update; } @@ -12753,7 +12754,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( } if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { - DRM_ERROR("wait_for_completion_timeout timeout!"); + drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!"); *operation_result = AUX_RET_ERROR_TIMEOUT; goto out; } @@ -12816,7 +12817,7 @@ int amdgpu_dm_process_dmub_set_config_sync( ret = 0; *operation_result = adev->dm.dmub_notify->sc_status; } else { - DRM_ERROR("wait_for_completion_timeout timeout!"); + drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!"); ret = -1; *operation_result = SET_CONFIG_UNKNOWN_ERROR; } -- GitLab From 40b85a9066f16b5d4abe3f9ea1750d46de0a4fd6 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Mon, 17 Mar 2025 13:29:47 -0400 Subject: [PATCH 076/899] drm/amd/display: Set ODM Factor Based On DML Architecture [Why] Mapping of ODM enum is different for DML2.0 vs DML2.1. Configs using DML2.1 will incorrectly trigger an assert meant for DML2.0. [How] Use if/else to seperate logic between DML2.0 and DML2.1. Reviewed-by: Aurabindo Pillai Signed-off-by: Austin Zheng Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_dc_resource_mgmt.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index a966abd407881..5f1b49a500495 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -1082,22 +1082,22 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s if (stream_disp_cfg_index >= disp_cfg_index_max) continue; - if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { - scratch.odm_info.odm_factor = 1; - } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { - scratch.odm_info.odm_factor = 2; - } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { - scratch.odm_info.odm_factor = 4; - } else { - ASSERT(false); - scratch.odm_info.odm_factor = 1; - } - + if (ctx->architecture == dml2_architecture_20) { + if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { + scratch.odm_info.odm_factor = 1; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { + scratch.odm_info.odm_factor = 2; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { + scratch.odm_info.odm_factor = 4; + } else { + ASSERT(false); + scratch.odm_info.odm_factor = 1; + } + } else if (ctx->architecture == dml2_architecture_21) { /* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0. * This is not an issue with new resource management logic. This block ensure backcompat * with legacy pipe management with updated DML. * */ - if (ctx->architecture == dml2_architecture_21) { if (ODMMode[stream_disp_cfg_index] == 1) { scratch.odm_info.odm_factor = 1; } else if (ODMMode[stream_disp_cfg_index] == 2) { -- GitLab From 7b9f8698796f44ac2b551d485a8e5e9aaa761812 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Fri, 14 Mar 2025 18:33:43 -0400 Subject: [PATCH 077/899] drm/amd/display: Use meaningful size for block_sequence array [Why] This array was initially defined as size 50. There were array overflow issues so the size was increased to 100. To ensure such issues are avoided in the future, the size should be set based on the possible contents instead of an arbitrary value. [How] - upper bound, assume every update occurs on max number of pipes - define array sizes for function parameters, for static analysis Reviewed-by: Dillon Varone Signed-off-by: Joshua Aberback Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 4 ++-- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 8 ++++++-- drivers/gpu/drm/amd/display/dc/inc/core_types.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 55b32dfbfdd63..7014b8d000bbd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -697,7 +697,7 @@ void get_fams2_visual_confirm_color( void hwss_build_fast_sequence(struct dc *dc, struct dc_dmub_cmd *dc_dmub_cmd, unsigned int dmub_cmd_count, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], unsigned int *num_steps, struct pipe_ctx *pipe_ctx, struct dc_stream_status *stream_status, @@ -896,7 +896,7 @@ void hwss_build_fast_sequence(struct dc *dc, } void hwss_execute_sequence(struct dc *dc, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], int num_steps) { unsigned int i; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index c8b5ed834579e..3a0795045bc61 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -195,6 +195,8 @@ enum block_sequence_func { DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, + /* This must be the last value in this enum, add new ones above */ + HWSS_BLOCK_SEQUENCE_FUNC_COUNT }; struct block_sequence { @@ -202,6 +204,8 @@ struct block_sequence { enum block_sequence_func func; }; +#define MAX_HWSS_BLOCK_SEQUENCE_SIZE (HWSS_BLOCK_SEQUENCE_FUNC_COUNT * MAX_PIPES) + struct hw_sequencer_funcs { void (*hardware_release)(struct dc *dc); /* Embedded Display Related */ @@ -534,13 +538,13 @@ void set_drr_and_clear_adjust_pending( struct drr_params *params); void hwss_execute_sequence(struct dc *dc, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], int num_steps); void hwss_build_fast_sequence(struct dc *dc, struct dc_dmub_cmd *dc_dmub_cmd, unsigned int dmub_cmd_count, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], unsigned int *num_steps, struct pipe_ctx *pipe_ctx, struct dc_stream_status *stream_status, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index c1c8742d4a58d..338bc240e8036 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -630,7 +630,7 @@ struct dc_state { */ struct bw_context bw_ctx; - struct block_sequence block_sequence[100]; + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE]; unsigned int block_sequence_steps; struct dc_dmub_cmd dc_dmub_cmd[10]; unsigned int dmub_cmd_count; -- GitLab From 7a2911b7f478f9b2ef921a5eb09f2a3a8975d9a2 Mon Sep 17 00:00:00 2001 From: Robin Chen Date: Tue, 18 Mar 2025 09:14:47 +0800 Subject: [PATCH 078/899] drm/amd/display: Enable Replay Low Hz feature flag Enable replay low refresh rate support. Reviewed-by: ChunTao Tso Signed-off-by: Robin Chen Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 83ffaae9f4396..1f8382db8599a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1089,7 +1089,8 @@ union replay_low_refresh_rate_enable_options { struct { //BIT[0-3]: Replay Low Hz Support control unsigned int ENABLE_LOW_RR_SUPPORT :1; - unsigned int RESERVED_1_3 :3; + unsigned int SKIP_ASIC_CHECK :1; + unsigned int RESERVED_2_3 :2; //BIT[4-15]: Replay Low Hz Enable Scenarios unsigned int ENABLE_STATIC_SCREEN :1; unsigned int ENABLE_FULL_SCREEN_VIDEO :1; @@ -1129,6 +1130,8 @@ struct replay_config { union replay_low_refresh_rate_enable_options low_rr_enable_options; /* Replay coasting vtotal is within low refresh rate range. */ bool low_rr_activated; + /* Replay low refresh rate supported*/ + bool low_rr_supported; }; /* Replay feature flags*/ -- GitLab From 3b258b6f521b38250e2b8cf0c80a026dd0e385c6 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 13 Mar 2025 15:24:59 -0400 Subject: [PATCH 079/899] drm/amd/display: Consider downspread against max clocks in DML2.1 [WHY&HOW] Core should evaluate support based on the max clocks after considering downspread. Reviewed-by: Austin Zheng Signed-off-by: Dillon Varone Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 6 +++--- .../amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c | 4 ++++ .../dc/dml2/dml21/src/inc/dml2_internal_shared_types.h | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 5d91f195397a2..a27409464616c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -7299,9 +7299,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; - mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000; + mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000; mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; - mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; + mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000; mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); @@ -8061,7 +8061,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); - if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) { + if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) { mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; } } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c index f4b1a7d02d426..a265f254152c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -182,6 +182,10 @@ static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_ min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1]; min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1]; + min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1]; min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1]; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index d8d01dceacdd4..00688b9f1df4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -37,6 +37,12 @@ struct dml2_mcg_min_clock_table { unsigned int dcfclk; } max_clocks_khz; + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dtbclk; + } max_ss_clocks_khz; + struct { unsigned int dprefclk; unsigned int xtalclk; -- GitLab From 50d6714b242e1c0a6918a211a45e0f7cfbd7c5bb Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 18 Mar 2025 17:05:50 -0400 Subject: [PATCH 080/899] drm/amd/display: use drm_info instead of DRM_INFO drm_info prints the drm device instance which is helpful when debugging multi gpu issues Reviewed-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a4dfffc89f1fe..1da749e4ddc40 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -783,7 +783,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */ if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) { - DRM_INFO("Skip DMUB HPD IRQ callback in suspend/resume\n"); + drm_info(adev_to_drm(adev), "Skip DMUB HPD IRQ callback in suspend/resume\n"); return; } @@ -800,9 +800,9 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, aconnector = to_amdgpu_dm_connector(connector); if (link && aconnector->dc_link == link) { if (notify->type == DMUB_NOTIFICATION_HPD) - DRM_INFO("DMUB HPD IRQ callback: link_index=%u\n", link_index); + drm_info(adev_to_drm(adev), "DMUB HPD IRQ callback: link_index=%u\n", link_index); else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) - DRM_INFO("DMUB HPD RX IRQ callback: link_index=%u\n", link_index); + drm_info(adev_to_drm(adev), "DMUB HPD RX IRQ callback: link_index=%u\n", link_index); else DRM_WARN("DMUB Unknown HPD callback type %d, link_index=%u\n", notify->type, link_index); @@ -1014,7 +1014,7 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector) drm_err(adev_to_drm(adev), "DM: Failed to initialize FBC\n"); else { adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr; - DRM_INFO("DM: FBC alloc %lu\n", max_size*4); + drm_info(adev_to_drm(adev), "DM: FBC alloc %lu\n", max_size*4); } } @@ -1200,7 +1200,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) } if (!has_hw_support) { - DRM_INFO("DMUB unsupported on ASIC\n"); + drm_info(adev_to_drm(adev), "DMUB unsupported on ASIC\n"); return 0; } @@ -1319,7 +1319,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return -ENOMEM; } - DRM_INFO("DMUB hardware initialized: version=0x%08X\n", + drm_info(adev_to_drm(adev), "DMUB hardware initialized: version=0x%08X\n", adev->dm.dmcub_fw_version); /* Keeping sanity checks off if @@ -2094,10 +2094,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc = dc_create(&init_data); if (adev->dm.dc) { - DRM_INFO("Display Core v%s initialized on %s\n", DC_VER, + drm_info(adev_to_drm(adev), "Display Core v%s initialized on %s\n", DC_VER, dce_version_to_string(adev->dm.dc->ctx->dce_version)); } else { - DRM_INFO("Display Core failed to initialize with v%s!\n", DC_VER); + drm_info(adev_to_drm(adev), "Display Core failed to initialize with v%s!\n", DC_VER); goto error; } @@ -2137,7 +2137,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc->debug.ignore_cable_id = true; if (adev->dm.dc->caps.dp_hdmi21_pcon_support) - DRM_INFO("DP-HDMI FRL PCON supported\n"); + drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n"); r = dm_dmub_hw_init(adev); if (r) { @@ -2197,7 +2197,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_completion(&adev->dm.dmub_aux_transfer_done); adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL); if (!adev->dm.dmub_notify) { - DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify"); + drm_info(adev_to_drm(adev), "amdgpu: fail to allocate adev->dm.dmub_notify"); goto error; } @@ -2573,7 +2573,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->inst_const_bytes), PAGE_SIZE); - DRM_INFO("Loading DMUB firmware via PSP: version=0x%08X\n", + drm_info(adev_to_drm(adev), "Loading DMUB firmware via PSP: version=0x%08X\n", adev->dm.dmcub_fw_version); } @@ -10237,7 +10237,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) enable_encryption = true; - DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); + drm_info(adev_to_drm(adev), "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); if (aconnector->dc_link) hdcp_update_display( -- GitLab From 16e24a95fbfcae70be71b3150f2def4847ac0a51 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 18 Mar 2025 17:25:16 -0400 Subject: [PATCH 081/899] drm/amd/display: use drm_warn instead of DRM_WARN drm_warn prints the drm device instance which is helpful when debugging multi gpu issues Reviewed-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1da749e4ddc40..f8847cf10dbd0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -804,7 +804,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) drm_info(adev_to_drm(adev), "DMUB HPD RX IRQ callback: link_index=%u\n", link_index); else - DRM_WARN("DMUB Unknown HPD callback type %d, link_index=%u\n", + drm_warn(adev_to_drm(adev), "DMUB Unknown HPD callback type %d, link_index=%u\n", notify->type, link_index); hpd_aconnector = aconnector; @@ -816,7 +816,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, if (hpd_aconnector) { if (notify->type == DMUB_NOTIFICATION_HPD) { if (hpd_aconnector->dc_link->hpd_status == (notify->hpd_status == DP_HPD_PLUG)) - DRM_WARN("DMUB reported hpd status unchanged. link_index=%u\n", link_index); + drm_warn(adev_to_drm(adev), "DMUB reported hpd status unchanged. link_index=%u\n", link_index); handle_hpd_irq_helper(hpd_aconnector); } else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) { handle_hpd_rx_irq(hpd_aconnector); @@ -939,7 +939,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) continue; } if (!dm->dmub_callback[notify.type]) { - DRM_WARN("DMUB notification skipped due to no handler: type=%s\n", + drm_warn(adev_to_drm(adev), "DMUB notification skipped due to no handler: type=%s\n", event_type[notify.type]); continue; } @@ -1207,7 +1207,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) /* Reset DMCUB if it was previously running - before we overwrite its memory. */ status = dmub_srv_hw_reset(dmub_srv); if (status != DMUB_STATUS_OK) - DRM_WARN("Error resetting DMUB HW: %d\n", status); + drm_warn(adev_to_drm(adev), "Error resetting DMUB HW: %d\n", status); hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; @@ -1304,7 +1304,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) /* Wait for firmware load to finish. */ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000); if (status != DMUB_STATUS_OK) - DRM_WARN("Wait for DMUB auto-load failed: %d\n", status); + drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status); /* Init DMCU and ABM if available. */ if (dmcu && abm) { @@ -1362,13 +1362,13 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev) status = dmub_srv_is_hw_init(dmub_srv, &init); if (status != DMUB_STATUS_OK) - DRM_WARN("DMUB hardware init check failed: %d\n", status); + drm_warn(adev_to_drm(adev), "DMUB hardware init check failed: %d\n", status); if (status == DMUB_STATUS_OK && init) { /* Wait for firmware load to finish. */ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000); if (status != DMUB_STATUS_OK) - DRM_WARN("Wait for DMUB auto-load failed: %d\n", status); + drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status); } else { /* Perform the full hardware initialization. */ r = dm_dmub_hw_init(adev); @@ -3089,7 +3089,7 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst; rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; if (rc) - DRM_WARN("Failed to %s pflip interrupts\n", + drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n", enable ? "enable" : "disable"); if (enable) { @@ -3099,14 +3099,14 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false); if (rc) - DRM_WARN("Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); + drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; /* During gpu-reset we disable and then enable vblank irq, so * don't use amdgpu_irq_get/put() to avoid refcount change. */ if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - DRM_WARN("Failed to %sable vblank interrupt\n", enable ? "en" : "dis"); + drm_warn(adev_to_drm(adev), "Failed to %sable vblank interrupt\n", enable ? "en" : "dis"); } } @@ -12353,7 +12353,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate; vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate; } else { - DRM_WARN("Unknown EDID CEA parser results\n"); + drm_warn(adev_to_drm(dm->adev), "Unknown EDID CEA parser results\n"); return false; } @@ -12765,7 +12765,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * lead to this error. We can ignore this for now. */ if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { - DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", + drm_warn(adev_to_drm(adev), "DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); } @@ -12779,7 +12779,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", + drm_warn(adev_to_drm(adev), "invalid read length %d from DPIA AUX 0x%x(%d)!\n", p_notify->aux_reply.length, payload->address, payload->length); *operation_result = AUX_RET_ERROR_INVALID_REPLY; -- GitLab From 4b884e3f03d64934e61eea87b6c1db613a8ff190 Mon Sep 17 00:00:00 2001 From: ChunTao Tso Date: Tue, 22 Oct 2024 14:54:50 +0800 Subject: [PATCH 082/899] drm/amd/display: Add a Panel Replay config option [Why] Replay need special policy for the scenario Teams, add a flag to imply apply special policy or not. [How] Add a config option intended for future use for video conferencing applications. Reviewed-by: Aric Cyr Signed-off-by: ChunTao Tso Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 2 ++ .../drm/amd/display/dc/link/protocols/link_edp_panel_control.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 1f8382db8599a..9bfa9ac1b05f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1132,6 +1132,8 @@ struct replay_config { bool low_rr_activated; /* Replay low refresh rate supported*/ bool low_rr_supported; + /* Replay Video Conferencing Optimization Enabled */ + bool replay_video_conferencing_optimization_enabled; }; /* Replay feature flags*/ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 669ac1bc662cd..da74c2b5854f3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1022,6 +1022,9 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream &alpm_config.raw, sizeof(alpm_config.raw)); } + + link->replay_settings.config.replay_video_conferencing_optimization_enabled = false; + return true; } -- GitLab From a3b7dc4a1ec4e184f48c2f8205d2ec88d31310fe Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 19 Mar 2025 12:45:13 -0400 Subject: [PATCH 083/899] drm/amd/display: Add Support for reg inbox0 for host->DMUB CMDs [WHY] DCN4+ supports a new register based mailbox for sending messages from host to DMCUB. This mailbox supports 64 byte commands, which makes it compatible with the same structure as the frame buffer based mailbox. [HOW] The intention for reg_inbox0 is to be slot in replacement for the frame buffer based mailbox (Inbox1). It supports all of the required features: - Supports all messages handled by FB Inbox1 - Supports multi command batching Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dillon Varone Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 185 ++++++----- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 2 +- drivers/gpu/drm/amd/display/dc/dc_helper.c | 2 +- .../gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c | 3 +- .../gpu/drm/amd/display/dc/dce/dmub_replay.c | 19 +- .../display/dc/link/protocols/link_dp_dpia.c | 1 + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 132 +++++--- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 + .../drm/amd/display/dmub/src/dmub_dcn401.c | 121 ++++--- .../drm/amd/display/dmub/src/dmub_dcn401.h | 4 +- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 297 ++++++++++++------ 11 files changed, 500 insertions(+), 270 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 614e03bfd5984..ca6da53f45adb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -70,20 +70,28 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv) } } -void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) +bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv) { - struct dmub_srv *dmub = dc_dmub_srv->dmub; - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dmub_srv *dmub; + struct dc_context *dc_ctx; enum dmub_status status; + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + do { - status = dmub_srv_wait_for_idle(dmub, 100000); + status = dmub_srv_wait_for_pending(dmub, 100000); } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); if (status != DMUB_STATUS_OK) { DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); } + + return status == DMUB_STATUS_OK; } void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv) @@ -126,7 +134,49 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv, } } -bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, +static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + struct dc_context *dc_ctx; + struct dmub_srv *dmub; + enum dmub_status status = DMUB_STATUS_OK; + int i; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + + for (i = 0 ; i < count; i++) { + /* confirm no messages pending */ + do { + status = dmub_srv_wait_for_idle(dmub, 100000); + } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); + + /* queue command */ + if (status == DMUB_STATUS_OK) + status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]); + + /* check for errors */ + if (status != DMUB_STATUS_OK) { + break; + } + } + + if (status != DMUB_STATUS_OK) { + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } + return false; + } + + return true; +} + +static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { @@ -143,11 +193,16 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, for (i = 0 ; i < count; i++) { // Queue command - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + if (!cmd_list[i].cmd_common.header.multi_cmd_pending || + dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) { + status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); + } else { + status = DMUB_STATUS_QUEUE_FULL; + } if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - status = dmub_srv_cmd_execute(dmub); + status = dmub_srv_fb_cmd_execute(dmub); if (status == DMUB_STATUS_POWER_STATE_D3) return false; @@ -156,7 +211,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); /* Requeue the command. */ - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); } if (status != DMUB_STATUS_OK) { @@ -168,7 +223,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } } - status = dmub_srv_cmd_execute(dmub); + status = dmub_srv_fb_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { if (status != DMUB_STATUS_POWER_STATE_D3) { DC_ERROR("Error starting DMUB execution: status=%d\n", status); @@ -180,6 +235,23 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, return true; } +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + bool res = false; + + if (dc_dmub_srv && dc_dmub_srv->dmub) { + if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) { + res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); + } else { + res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); + } + } + + return res; +} + bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, enum dm_dmub_wait_type wait_type, union dmub_rb_cmd *cmd_list) @@ -202,7 +274,8 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); if (!dmub->debug.timeout_info.timeout_occured) { dmub->debug.timeout_info.timeout_occured = true; - dmub->debug.timeout_info.timeout_cmd = *cmd_list; + if (cmd_list) + dmub->debug.timeout_info.timeout_cmd = *cmd_list; dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); } dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); @@ -210,8 +283,9 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, } // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) { + dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list); + } } return true; @@ -224,74 +298,10 @@ bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type) { - struct dc_context *dc_ctx; - struct dmub_srv *dmub; - enum dmub_status status; - int i; - - if (!dc_dmub_srv || !dc_dmub_srv->dmub) - return false; - - dc_ctx = dc_dmub_srv->ctx; - dmub = dc_dmub_srv->dmub; - - for (i = 0 ; i < count; i++) { - // Queue command - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); - - if (status == DMUB_STATUS_QUEUE_FULL) { - /* Execute and wait for queue to become empty again. */ - status = dmub_srv_cmd_execute(dmub); - if (status == DMUB_STATUS_POWER_STATE_D3) - return false; - - status = dmub_srv_wait_for_idle(dmub, 100000); - if (status != DMUB_STATUS_OK) - return false; - - /* Requeue the command. */ - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); - } - - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } - return false; - } - } - - status = dmub_srv_cmd_execute(dmub); - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } + if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list)) return false; - } - - // Wait for DMUB to process command - if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { - if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { - do { - status = dmub_srv_wait_for_idle(dmub, 100000); - } while (status != DMUB_STATUS_OK); - } else - status = dmub_srv_wait_for_idle(dmub, 100000); - if (status != DMUB_STATUS_OK) { - DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - return false; - } - - // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); - } - - return true; + return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list); } bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) @@ -1243,7 +1253,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); + dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); memset(&new_signals, 0, sizeof(new_signals)); @@ -1400,7 +1410,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dmub_srv_sync_inbox1(dc->ctx->dmub_srv->dmub); + dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub); } } @@ -1654,7 +1664,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* fill in generic command header */ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + global_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); if (enable) { /* send global configuration parameters */ @@ -1673,11 +1684,13 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* configure command header */ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_base_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_base_cmd->header.multi_cmd_pending = 1; stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_sub_state_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_sub_state_cmd->header.multi_cmd_pending = 1; /* copy stream static base state */ memcpy(&stream_base_cmd->config, @@ -1723,7 +1736,8 @@ void dc_dmub_srv_fams2_drr_update(struct dc *dc, cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num; cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger; - cmd.fams2_drr_update.header.payload_bytes = sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); + cmd.fams2_drr_update.header.payload_bytes = + sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -1759,7 +1773,8 @@ void dc_dmub_srv_fams2_passthrough_flip( /* build command header */ cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP; - cmds[num_cmds].fams2_flip.header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2_flip); + cmds[num_cmds].fams2_flip.header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header); /* for chaining multiple commands, all but last command should set to 1 */ cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index a636f4c3f01db..ada5c2fb2db30 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -58,7 +58,7 @@ struct dc_dmub_srv { bool needs_idle_wake; }; -void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); +bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv); bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 8f077e15b4f06..72b87b78da0e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -682,7 +682,7 @@ void reg_sequence_wait_done(const struct dc_context *ctx) if (offload && ctx->dc->debug.dmub_offload_enabled && !ctx->dc->debug.dmcub_emulation) { - dc_dmub_srv_wait_idle(ctx->dmub_srv); + dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); } } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 0d7e7f3b81a1f..a641ae04450c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -240,7 +240,8 @@ bool dmub_abm_save_restore( cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask; - cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); + cmd.abm_save_restore.header.payload_bytes = + sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header); dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index c31e4f26a305b..fcd3d86ad5173 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -280,7 +280,9 @@ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dm memset(&cmd, 0, sizeof(cmd)); pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; - pCmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); + pCmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) - + sizeof(struct dmub_cmd_header); pCmd->replay_set_power_opt_data.power_opt = power_opt; pCmd->replay_set_power_opt_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); @@ -319,7 +321,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_timing_sync.header.sub_type = DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; cmd.replay_set_timing_sync.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_timing_sync); + sizeof(struct dmub_rb_cmd_replay_set_timing_sync) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = cmd_element->sync_data.panel_inst; @@ -331,7 +334,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_frameupdate_timer.header.sub_type = DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; cmd.replay_set_frameupdate_timer.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_frameupdate_timer.data.panel_inst = cmd_element->panel_inst; @@ -345,7 +349,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_pseudo_vtotal.header.sub_type = DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL; cmd.replay_set_pseudo_vtotal.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal); + sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_pseudo_vtotal.data.panel_inst = cmd_element->pseudo_vtotal_data.panel_inst; @@ -357,7 +362,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_disabled_adaptive_sync_sdp.header.sub_type = DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP; cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp); + sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst = cmd_element->disabled_adaptive_sync_sdp_data.panel_inst; @@ -369,7 +375,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_general_cmd.header.sub_type = DMUB_CMD__REPLAY_SET_GENERAL_CMD; cmd.replay_set_general_cmd.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_general_cmd); + sizeof(struct dmub_rb_cmd_replay_set_general_cmd) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_general_cmd.data.panel_inst = cmd_element->set_general_cmd_data.panel_inst; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 0d123e647652e..c149210096ac7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -92,6 +92,7 @@ bool dpia_query_hpd_status(struct dc_link *link) /* prepare QUERY_HPD command */ cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; + cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data); cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 80595786341f9..22615b47d9bfc 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -51,8 +51,8 @@ * for the cache windows. * * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare - * for command submission. Commands can be queued via dmub_srv_cmd_queue() - * and executed via dmub_srv_cmd_execute(). + * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue() + * and executed via dmub_srv_fb_cmd_execute(). * * If the queue is full the dmub_srv_wait_for_idle() call can be used to * wait until the queue has been cleared. @@ -170,6 +170,13 @@ enum dmub_srv_power_state_type { DMUB_POWER_STATE_D3 = 8 }; +/* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */ +enum dmub_inbox_cmd_interface_type { + DMUB_CMD_INTERFACE_DEFAULT = 0, + DMUB_CMD_INTERFACE_FB = 1, + DMUB_CMD_INTERFACE_REG = 2, +}; + /** * struct dmub_region - dmub hw memory region * @base: base address for region, must be 256 byte aligned @@ -349,6 +356,21 @@ struct dmub_diagnostic_data { uint8_t is_cw6_enabled : 1; }; +struct dmub_srv_inbox { + /* generic status */ + uint64_t num_submitted; + uint64_t num_reported; + union { + /* frame buffer mailbox status */ + struct dmub_rb rb; + /* register mailbox status */ + struct { + bool is_pending; + bool is_multi_pending; + }; + }; +}; + /** * struct dmub_srv_base_funcs - Driver specific base callbacks */ @@ -462,18 +484,21 @@ struct dmub_srv_hw_funcs { void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); + void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub); void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); + void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); + void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); + uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub); void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub); void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg); void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp); uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub); - void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable); }; @@ -493,6 +518,7 @@ struct dmub_srv_create_params { enum dmub_asic asic; uint32_t fw_version; bool is_virtual; + enum dmub_inbox_cmd_interface_type inbox_type; }; /** @@ -521,8 +547,9 @@ struct dmub_srv { const struct dmub_srv_dcn401_regs *regs_dcn401; struct dmub_srv_base_funcs funcs; struct dmub_srv_hw_funcs hw_funcs; - struct dmub_rb inbox1_rb; + struct dmub_srv_inbox inbox1; uint32_t inbox1_last_wptr; + struct dmub_srv_inbox reg_inbox0; /** * outbox1_rb is accessed without locks (dal & dc) * and to be used only in dmub_srv_stat_get_notification() @@ -542,6 +569,7 @@ struct dmub_srv { struct dmub_fw_meta_info meta_info; struct dmub_feature_caps feature_caps; struct dmub_visual_confirm_color visual_confirm_color; + enum dmub_inbox_cmd_interface_type inbox_type; enum dmub_srv_power_state_type power_state; struct dmub_diagnostic_data debug; @@ -695,19 +723,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); /** - * dmub_srv_sync_inbox1() - sync sw state with hw state - * @dmub: the dmub service - * - * Sync sw state with hw state when resume from S0i3 - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); - -/** - * dmub_srv_cmd_queue() - queues a command to the DMUB + * dmub_srv_fb_cmd_queue() - queues a command to the DMUB * @dmub: the dmub service * @cmd: the command to queue * @@ -719,11 +735,11 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); * DMUB_STATUS_QUEUE_FULL - no remaining room in queue * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd); /** - * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub + * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub * @dmub: the dmub service * * Begins execution of queued commands on the dmub. @@ -732,7 +748,7 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, * DMUB_STATUS_OK - success * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); +enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub); /** * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed @@ -790,6 +806,23 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, uint32_t timeout_us); +/** + * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending + * @dmub: the dmub service + * @timeout_us: the maximum number of microseconds to wait + * + * Waits until the commands queued prior to this call are complete. + * If interfaces remain busy due to additional work being submitted + * concurrently, this function will not continue to wait. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, + uint32_t timeout_us); + /** * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle * @dmub: the dmub service @@ -888,9 +921,6 @@ enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub, enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub, union dmub_fw_boot_options *option); -enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd); - enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, bool skip); @@ -954,26 +984,6 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); */ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); -/** - * dmub_srv_send_reg_inbox0_cmd() - send a dmub command and wait for the command - * being processed by DMUB. - * @dmub: The dmub service - * @cmd: The dmub command being sent. If with_replay is true, the function will - * update cmd with replied data. - * @with_reply: true if DMUB reply needs to be copied back to cmd. false if the - * cmd doesn't need to be replied. - * @timeout_us: timeout in microseconds. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - DMUB fails to process the command within the timeout - * interval. - */ -enum dmub_status dmub_srv_send_reg_inbox0_cmd( - struct dmub_srv *dmub, - union dmub_rb_cmd *cmd, - bool with_reply, uint32_t timeout_us); - /** * dmub_srv_set_power_state() - Track DC power state in dmub_srv * @dmub: The dmub service @@ -986,4 +996,40 @@ enum dmub_status dmub_srv_send_reg_inbox0_cmd( */ void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); +/** + * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub + * @dmub: the dmub service + * @cmd: the command packet to be executed + * + * Executes a single command for the dmub. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); + + +/** + * dmub_srv_cmd_get_response() - Copies return data for command into buffer + * @dmub: the dmub service + * @cmd_rsp: response buffer + * + * Copies return data for command into buffer + */ +void dmub_srv_cmd_get_response(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd_rsp); + +/** + * dmub_srv_sync_inboxes() - Sync inbox state + * @dmub: the dmub service + * + * Sync inbox state + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub); + #endif /* _DMUB_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5e7c698f9bfb6..4a4e980688933 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -5993,6 +5993,9 @@ static inline uint32_t dmub_rb_num_free(struct dmub_rb *rb) else data_count = rb->capacity - (rb->rptr - rb->wrpt); + /* +1 because 1 entry is always unusable */ + data_count += DMUB_RB_CMD_SIZE; + return (rb->capacity - data_count) / DMUB_RB_CMD_SIZE; } @@ -6012,6 +6015,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) else data_count = rb->capacity - (rb->rptr - rb->wrpt); + /* -1 because 1 entry is always unusable */ return (data_count == (rb->capacity - DMUB_RB_CMD_SIZE)); } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index e67f7c4784eb7..731ca9b6a6cfc 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -517,28 +517,69 @@ void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) { uint32_t *dwords = (uint32_t *)cmd; - + int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes; + uint32_t msg_index; static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]); - REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]); - REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]); - REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]); - REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]); - REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]); - REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]); - REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]); - REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]); - REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]); - REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]); - REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]); - REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]); - REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]); - REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]); + /* read remaining data based on payload size */ + for (msg_index = 0; msg_index < 15; msg_index++) { + if (payload_size_bytes <= msg_index * 4) { + break; + } + + switch (msg_index) { + case 0: + REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]); + break; + case 1: + REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]); + break; + case 2: + REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]); + break; + case 3: + REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]); + break; + case 4: + REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]); + break; + case 5: + REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]); + break; + case 6: + REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]); + break; + case 7: + REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]); + break; + case 8: + REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]); + break; + case 9: + REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]); + break; + case 10: + REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]); + break; + case 11: + REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]); + break; + case 12: + REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]); + break; + case 13: + REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]); + break; + case 14: + REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]); + break; + } + } + /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY * interrupt. */ - REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]); + REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]); } uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub) @@ -556,30 +597,39 @@ void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0); - dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1); - dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2); - dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3); - dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4); - dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5); - dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6); - dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7); - dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8); - dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9); - dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10); - dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11); - dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12); - dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13); - dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14); - dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP); + dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP); + dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0); + dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1); + dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2); + dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3); + dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4); + dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5); + dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6); + dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7); + dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8); + dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9); + dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10); + dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11); + dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12); + dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13); + dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14); } void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1); +} + +void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) +{ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0); } +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); +} + void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1); @@ -604,11 +654,6 @@ uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub) return status; } -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) -{ - REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); -} - void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h index c35be52676f6f..88c3a44d67d9a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -277,11 +277,13 @@ uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub); void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); + void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub); void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg); void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg); uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub); -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable); uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index ae8133816b43c..07bb1d4c4287d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -157,6 +157,9 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) { struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; + /* default to specifying now inbox type */ + enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT; + switch (asic) { case DMUB_ASIC_DCN20: case DMUB_ASIC_DCN21: @@ -395,10 +398,15 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_current_time = dmub_dcn401_get_current_time; funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data; + funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg; funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status; funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp; funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack; + funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack; + funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int; + default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now + funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack; funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg; funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp; @@ -411,6 +419,20 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) return false; } + /* set default inbox type if not overriden */ + if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) { + if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) { + /* use default inbox type as specified by DCN rev */ + dmub->inbox_type = default_inbox_type; + } else if (funcs->send_reg_inbox0_cmd_msg) { + /* prefer reg as default inbox type if present */ + dmub->inbox_type = DMUB_CMD_INTERFACE_REG; + } else { + /* use fb as fallback */ + dmub->inbox_type = DMUB_CMD_INTERFACE_FB; + } + } + return true; } @@ -426,6 +448,7 @@ enum dmub_status dmub_srv_create(struct dmub_srv *dmub, dmub->asic = params->asic; dmub->fw_version = params->fw_version; dmub->is_virtual = params->is_virtual; + dmub->inbox_type = params->inbox_type; /* Setup asic dependent hardware funcs. */ if (!dmub_srv_hw_setup(dmub, params->asic)) { @@ -695,7 +718,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, inbox1.base = cw4.region.base; inbox1.top = cw4.region.base + DMUB_RB_SIZE; outbox1.base = inbox1.top; - outbox1.top = cw4.region.top; + outbox1.top = inbox1.top + DMUB_RB_SIZE; cw5.offset.quad_part = tracebuff_fb->gpu_addr; cw5.region.base = DMUB_CW5_BASE; @@ -737,7 +760,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, rb_params.ctx = dmub; rb_params.base_address = mail_fb->cpu_addr; rb_params.capacity = DMUB_RB_SIZE; - dmub_rb_init(&dmub->inbox1_rb, &rb_params); + dmub_rb_init(&dmub->inbox1.rb, &rb_params); // Initialize outbox1 ring buffer rb_params.ctx = dmub; @@ -768,27 +791,6 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) -{ - if (!dmub->sw_init) - return DMUB_STATUS_INVALID; - - if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { - uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); - - if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { - return DMUB_STATUS_HW_FAILURE; - } else { - dmub->inbox1_rb.rptr = rptr; - dmub->inbox1_rb.wrpt = wptr; - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; - } - } - - return DMUB_STATUS_OK; -} - enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) { if (!dmub->sw_init) @@ -799,8 +801,13 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) /* mailboxes have been reset in hw, so reset the sw state as well */ dmub->inbox1_last_wptr = 0; - dmub->inbox1_rb.wrpt = 0; - dmub->inbox1_rb.rptr = 0; + dmub->inbox1.rb.wrpt = 0; + dmub->inbox1.rb.rptr = 0; + dmub->inbox1.num_reported = 0; + dmub->inbox1.num_submitted = 0; + dmub->reg_inbox0.num_reported = 0; + dmub->reg_inbox0.num_submitted = 0; + dmub->reg_inbox0.is_pending = 0; dmub->outbox0_rb.wrpt = 0; dmub->outbox0_rb.rptr = 0; dmub->outbox1_rb.wrpt = 0; @@ -811,7 +818,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd) { if (!dmub->hw_init) @@ -820,18 +827,20 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (dmub->power_state != DMUB_POWER_STATE_D0) return DMUB_STATUS_POWER_STATE_D3; - if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || - dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { + if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity || + dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) { return DMUB_STATUS_HW_FAILURE; } - if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) + if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) { + dmub->inbox1.num_submitted++; return DMUB_STATUS_OK; + } return DMUB_STATUS_QUEUE_FULL; } -enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) +enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub) { struct dmub_rb flush_rb; @@ -846,13 +855,13 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) * been flushed to framebuffer memory. Otherwise DMCUB might * read back stale, fully invalid or partially invalid data. */ - flush_rb = dmub->inbox1_rb; + flush_rb = dmub->inbox1.rb; flush_rb.rptr = dmub->inbox1_last_wptr; dmub_rb_flush_pending(&flush_rb); - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt); - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; return DMUB_STATUS_OK; } @@ -910,26 +919,97 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, return DMUB_STATUS_TIMEOUT; } +static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub) +{ + if (dmub->reg_inbox0.is_pending) { + dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status && + !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + + if (!dmub->reg_inbox0.is_pending) { + /* ack the rsp interrupt */ + if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack) + dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); + + /* only update the reported count if commands aren't being batched */ + if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) { + dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted; + } + } + } +} + +enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, + uint32_t timeout_us) +{ + uint32_t i; + const uint32_t polling_interval_us = 1; + struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0; + struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1; + const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0; + const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1; + + if (!dmub->hw_init || + !dmub->hw_funcs.get_inbox1_wptr) + return DMUB_STATUS_INVALID; + + /* take a snapshot of the required mailbox state */ + scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); + + for (i = 0; i <= timeout_us; i += polling_interval_us) { + scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + + scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending && + dmub->hw_funcs.read_reg_inbox0_rsp_int_status && + !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + + if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity) + return DMUB_STATUS_HW_FAILURE; + + /* check current HW state first, but use command submission vs reported as a fallback */ + if ((dmub_rb_empty(&scratch_inbox1.rb) || + inbox1->num_reported >= scratch_inbox1.num_submitted) && + (!scratch_reg_inbox0.is_pending || + reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted)) + return DMUB_STATUS_OK; + + udelay(polling_interval_us); + } + + return DMUB_STATUS_TIMEOUT; +} + enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, uint32_t timeout_us) { uint32_t i, rptr; + const uint32_t polling_interval_us = 1; if (!dmub->hw_init) return DMUB_STATUS_INVALID; - for (i = 0; i <= timeout_us; ++i) { + for (i = 0; i < timeout_us; i += polling_interval_us) { + /* update inbox1 state */ rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - if (rptr > dmub->inbox1_rb.capacity) + if (rptr > dmub->inbox1.rb.capacity) return DMUB_STATUS_HW_FAILURE; - dmub->inbox1_rb.rptr = rptr; + if (dmub->inbox1.rb.rptr > rptr) { + /* rb wrapped */ + dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; + } else { + dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; + } + dmub->inbox1.rb.rptr = rptr; + + /* update reg_inbox0 */ + dmub_srv_update_reg_inbox0_status(dmub); - if (dmub_rb_empty(&dmub->inbox1_rb)) + /* check for idle */ + if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending) return DMUB_STATUS_OK; - udelay(1); + udelay(polling_interval_us); } return DMUB_STATUS_TIMEOUT; @@ -1040,35 +1120,6 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd) -{ - enum dmub_status status = DMUB_STATUS_OK; - - // Queue command - status = dmub_srv_cmd_queue(dmub, cmd); - - if (status != DMUB_STATUS_OK) - return status; - - // Execute command - status = dmub_srv_cmd_execute(dmub); - - if (status != DMUB_STATUS_OK) - return status; - - // Wait for DMUB to process command - status = dmub_srv_wait_for_idle(dmub, 100000); - - if (status != DMUB_STATUS_OK) - return status; - - // Copy data back from ring buffer into command - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd); - - return status; -} - static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb, void *entry) { @@ -1160,47 +1211,105 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_ } } +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) +{ + if (!dmub || !dmub->hw_init) + return; + + dmub->power_state = dmub_srv_power_state; +} -enum dmub_status dmub_srv_send_reg_inbox0_cmd( - struct dmub_srv *dmub, - union dmub_rb_cmd *cmd, - bool with_reply, uint32_t timeout_us) +enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) { - uint32_t rsp_ready = 0; - uint32_t i; + uint32_t num_pending = 0; + + if (!dmub->hw_init) + return DMUB_STATUS_INVALID; + + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_POWER_STATE_D3; + + if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg || + !dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack) + return DMUB_STATUS_INVALID; + + if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported) + num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported; + else + /* num_submitted wrapped */ + num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY - + (dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted); + if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY) + return DMUB_STATUS_QUEUE_FULL; + + /* clear last rsp ack and send message */ + dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub); dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); - for (i = 0; i < timeout_us; i++) { - rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - if (rsp_ready) - break; - udelay(1); + dmub->reg_inbox0.num_submitted++; + dmub->reg_inbox0.is_pending = true; + dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending; + + return DMUB_STATUS_OK; +} + +void dmub_srv_cmd_get_response(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd_rsp) +{ + if (dmub) { + if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG && + dmub->hw_funcs.read_reg_inbox0_cmd_rsp) { + dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp); + } else { + dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp); + } } - if (rsp_ready == 0) - return DMUB_STATUS_TIMEOUT; +} - if (with_reply) - dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd); +static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub) +{ + if (!dmub || !dmub->sw_init) + return DMUB_STATUS_INVALID; - dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); + dmub->reg_inbox0.is_pending = 0; + dmub->reg_inbox0.is_multi_pending = 0; - /* wait for rsp int status is cleared to initial state before exit */ - for (; i <= timeout_us; i++) { - rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - if (rsp_ready == 0) - break; - udelay(1); + return DMUB_STATUS_OK; +} + +static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) +{ + if (!dmub->sw_init) + return DMUB_STATUS_INVALID; + + if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + + if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } else { + dmub->inbox1.rb.rptr = rptr; + dmub->inbox1.rb.wrpt = wptr; + dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; + } } - ASSERT(rsp_ready == 0); return DMUB_STATUS_OK; } -void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) +enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub) { - if (!dmub || !dmub->hw_init) - return; + enum dmub_status status; - dmub->power_state = dmub_srv_power_state; + status = dmub_srv_sync_reg_inbox0(dmub); + if (status != DMUB_STATUS_OK) + return status; + + status = dmub_srv_sync_inbox1(dmub); + if (status != DMUB_STATUS_OK) + return status; + + return DMUB_STATUS_OK; } -- GitLab From 146a4429b5674b7520a96aea34233949731c6086 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 20 Mar 2025 13:58:24 -0400 Subject: [PATCH 084/899] drm/amd/display: Do Not Consider DSC if Valid Config Not Found [why] In the mode validation, mst dsc is considered for bw calculation after common dsc config is determined. Currently it considered common dsc config is found if max and min target bpp are non zero which is not accurate. Invalid max and min target bpp values would not get max_kbps and min_kbps calculated, leading to falsefully pass a mode that does not have valid dsc parameters available. [how] Use the return value of decide_dsc_bandwidth_range() to determine whether valid dsc common config is found or not. Prune out modes that do not have valid common dsc config determined. Reviewed-by: Wayne Lin Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7ceedf626d23f..d8dcfb3efaaa6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1713,16 +1713,17 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, struct dc_dsc_bw_range *bw_range) { struct dc_dsc_policy dsc_policy = {0}; + bool is_dsc_possible; dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); - dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], - stream->sink->ctx->dc->debug.dsc_min_slice_height_override, - dsc_policy.min_target_bpp * 16, - dsc_policy.max_target_bpp * 16, - &stream->sink->dsc_caps.dsc_dec_caps, - &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range); - - return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; + is_dsc_possible = dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], + stream->sink->ctx->dc->debug.dsc_min_slice_height_override, + dsc_policy.min_target_bpp * 16, + dsc_policy.max_target_bpp * 16, + &stream->sink->dsc_caps.dsc_dec_caps, + &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range); + + return is_dsc_possible; } #endif -- GitLab From fe45e2af4a22e569b35b7f45eb9f040f6fbef94f Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 19 Mar 2025 13:53:25 -0400 Subject: [PATCH 085/899] drm/amd/display: Fix VUpdate offset calculations for dcn401 [WHY&HOW] DCN401 uses a different structure to store the VStartup offset used to calculate the VUpdate position, so adjust the calculations to use this value. Reviewed-by: Aric Cyr Signed-off-by: Dillon Varone Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 44 +++++++++++++++++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 1 + .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f64..79f4eaf8fc520 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2646,3 +2646,47 @@ void dcn401_plane_atomic_power_down(struct dc *dc, if (hws->funcs.dpp_root_clock_control) hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); } + +/* + * apply_front_porch_workaround + * + * This is a workaround for a bug that has existed since R5xx and has not been + * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. + */ +static void apply_front_porch_workaround( + struct dc_crtc_timing *timing) +{ + if (timing->flags.INTERLACE == 1) { + if (timing->v_front_porch < 2) + timing->v_front_porch = 2; + } else { + if (timing->v_front_porch < 1) + timing->v_front_porch = 1; + } +} + +int dcn401_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) +{ + const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; + struct dc_crtc_timing patched_crtc_timing; + int vesa_sync_start; + int asic_blank_end; + int interlace_factor; + + patched_crtc_timing = *dc_crtc_timing; + apply_front_porch_workaround(&patched_crtc_timing); + + interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; + + vesa_sync_start = patched_crtc_timing.v_addressable + + patched_crtc_timing.v_border_bottom + + patched_crtc_timing.v_front_porch; + + asic_blank_end = (patched_crtc_timing.v_total - + vesa_sync_start - + patched_crtc_timing.v_border_top) + * interlace_factor; + + return asic_blank_end - + pipe_ctx->global_sync.dcn4x.vstartup_lines + 1; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 781cf0efccc6c..37c915568afcb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -109,4 +109,5 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); +int dcn401_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index fe7aceb2f5104..aa9573ce44fce 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -73,7 +73,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .init_sys_ctx = dcn20_init_sys_ctx, .init_vm_ctx = dcn20_init_vm_ctx, .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .get_vupdate_offset_from_vsync = dcn401_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, .apply_idle_power_optimizations = dcn401_apply_idle_power_optimizations, .does_plane_fit_in_mall = NULL, -- GitLab From 0fc9635a801f6ba3a03ad2de6d46f4f3e2fdfed6 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 28 Mar 2025 12:56:39 -0400 Subject: [PATCH 086/899] Revert "drm/amd/display: Fix VUpdate offset calculations for dcn401" This reverts commit fe45e2af4a22e569b35b7f45eb9f040f6fbef94f. Reason for revert: it causes stuttering in some usecases. Reviewed-by: Aric Cyr Signed-off-by: Dillon Varone Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 44 ------------------- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 1 - .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 3 files changed, 1 insertion(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 79f4eaf8fc520..5489f3d431f64 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2646,47 +2646,3 @@ void dcn401_plane_atomic_power_down(struct dc *dc, if (hws->funcs.dpp_root_clock_control) hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); } - -/* - * apply_front_porch_workaround - * - * This is a workaround for a bug that has existed since R5xx and has not been - * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. - */ -static void apply_front_porch_workaround( - struct dc_crtc_timing *timing) -{ - if (timing->flags.INTERLACE == 1) { - if (timing->v_front_porch < 2) - timing->v_front_porch = 2; - } else { - if (timing->v_front_porch < 1) - timing->v_front_porch = 1; - } -} - -int dcn401_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) -{ - const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; - struct dc_crtc_timing patched_crtc_timing; - int vesa_sync_start; - int asic_blank_end; - int interlace_factor; - - patched_crtc_timing = *dc_crtc_timing; - apply_front_porch_workaround(&patched_crtc_timing); - - interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; - - vesa_sync_start = patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom + - patched_crtc_timing.v_front_porch; - - asic_blank_end = (patched_crtc_timing.v_total - - vesa_sync_start - - patched_crtc_timing.v_border_top) - * interlace_factor; - - return asic_blank_end - - pipe_ctx->global_sync.dcn4x.vstartup_lines + 1; -} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 37c915568afcb..781cf0efccc6c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -109,5 +109,4 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); -int dcn401_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index aa9573ce44fce..fe7aceb2f5104 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -73,7 +73,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .init_sys_ctx = dcn20_init_sys_ctx, .init_vm_ctx = dcn20_init_vm_ctx, .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn401_get_vupdate_offset_from_vsync, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, .apply_idle_power_optimizations = dcn401_apply_idle_power_optimizations, .does_plane_fit_in_mall = NULL, -- GitLab From e8cc149ed906a371a5962ff8065393bae28165c9 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 19 Mar 2025 13:54:44 -0400 Subject: [PATCH 087/899] drm/amd/display: Fix Vertical Interrupt definitions for dcn32, dcn401 [WHY&HOW] - VUPDATE_NO_LOCK should be used in place of VUPDATE always - Add VERTICAL_INTERRUPT1 and VERTICAL_INTERRUPT2 definitions Reviewed-by: Aric Cyr Signed-off-by: Dillon Varone Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/irq/dcn32/irq_service_dcn32.c | 61 ++++++++++++++----- .../dc/irq/dcn401/irq_service_dcn401.c | 60 +++++++++++++----- drivers/gpu/drm/amd/display/dc/irq_types.h | 9 +++ 3 files changed, 101 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c index f0ac0aeeac512..f839afacd5a5c 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c @@ -191,6 +191,16 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .ack = NULL }; +static struct irq_source_info_funcs vline1_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vline2_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -259,6 +269,13 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } +#define vblank_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ + .funcs = &vblank_irq_info_funcs\ + } /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -270,14 +287,6 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vupdate_no_lock_irq_info_funcs\ } -#define vblank_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ -} - #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -285,6 +294,20 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ .funcs = &vline0_irq_info_funcs\ } +#define vline1_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\ + .funcs = &vline1_irq_info_funcs\ + } +#define vline2_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\ + .funcs = &vline2_irq_info_funcs\ + } #define dmub_outbox_int_entry()\ [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\ IRQ_REG_ENTRY_DMUB(\ @@ -387,21 +410,29 @@ irq_source_info_dcn32[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_no_lock_int_entry(0), - vupdate_no_lock_int_entry(1), - vupdate_no_lock_int_entry(2), - vupdate_no_lock_int_entry(3), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), vblank_int_entry(3), + [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), + [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), + dmub_outbox_int_entry(), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), vline0_int_entry(0), vline0_int_entry(1), vline0_int_entry(2), vline0_int_entry(3), - [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), - [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), - dmub_outbox_int_entry(), + vline1_int_entry(0), + vline1_int_entry(1), + vline1_int_entry(2), + vline1_int_entry(3), + vline2_int_entry(0), + vline2_int_entry(1), + vline2_int_entry(2), + vline2_int_entry(3) }; static const struct irq_service_funcs irq_service_funcs_dcn32 = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c index b43c9524b0de1..8499e505cf3ef 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c @@ -171,6 +171,16 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .ack = NULL }; +static struct irq_source_info_funcs vline1_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vline2_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -239,6 +249,13 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } +#define vblank_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ + .funcs = &vblank_irq_info_funcs\ + } /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -250,13 +267,6 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vupdate_no_lock_irq_info_funcs\ } -#define vblank_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ - } #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -264,6 +274,20 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ .funcs = &vline0_irq_info_funcs\ } +#define vline1_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\ + .funcs = &vline1_irq_info_funcs\ + } +#define vline2_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\ + .funcs = &vline2_irq_info_funcs\ + } #define dmub_outbox_int_entry()\ [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\ IRQ_REG_ENTRY_DMUB(\ @@ -364,21 +388,29 @@ irq_source_info_dcn401[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_no_lock_int_entry(0), - vupdate_no_lock_int_entry(1), - vupdate_no_lock_int_entry(2), - vupdate_no_lock_int_entry(3), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), vblank_int_entry(3), + [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), + [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), + dmub_outbox_int_entry(), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), vline0_int_entry(0), vline0_int_entry(1), vline0_int_entry(2), vline0_int_entry(3), - [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), - [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), - dmub_outbox_int_entry(), + vline1_int_entry(0), + vline1_int_entry(1), + vline1_int_entry(2), + vline1_int_entry(3), + vline2_int_entry(0), + vline2_int_entry(1), + vline2_int_entry(2), + vline2_int_entry(3), }; static const struct irq_service_funcs irq_service_funcs_dcn401 = { diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index 110f656d43aee..eadab0a2afebe 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -161,6 +161,13 @@ enum dc_irq_source { DC_IRQ_SOURCE_DPCX_TX_PHYE, DC_IRQ_SOURCE_DPCX_TX_PHYF, + DC_IRQ_SOURCE_DC1_VLINE2, + DC_IRQ_SOURCE_DC2_VLINE2, + DC_IRQ_SOURCE_DC3_VLINE2, + DC_IRQ_SOURCE_DC4_VLINE2, + DC_IRQ_SOURCE_DC5_VLINE2, + DC_IRQ_SOURCE_DC6_VLINE2, + DAL_IRQ_SOURCES_NUMBER }; @@ -170,6 +177,8 @@ enum irq_type IRQ_TYPE_VUPDATE = DC_IRQ_SOURCE_VUPDATE1, IRQ_TYPE_VBLANK = DC_IRQ_SOURCE_VBLANK1, IRQ_TYPE_VLINE0 = DC_IRQ_SOURCE_DC1_VLINE0, + IRQ_TYPE_VLINE1 = DC_IRQ_SOURCE_DC1_VLINE1, + IRQ_TYPE_VLINE2 = DC_IRQ_SOURCE_DC1_VLINE2, IRQ_TYPE_DCUNDERFLOW = DC_IRQ_SOURCE_DC1UNDERFLOW, }; -- GitLab From b5af7525ae0cb9b5d0a5dcb7d2e141ebe1462bdd Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 24 Mar 2025 00:40:16 -0500 Subject: [PATCH 088/899] drm/amd/display: Promote DAL to 3.2.327 Summary: * Improve vrr for replay and psr * Rewrite drm debug message * Fix clock issues for dcn32 and dcn401 * Fix mst dsc mode validation issue Reviewed-by: Aurabindo Pillai Signed-off-by: Aric Cyr Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0e98af9fb9e19..a24d004f8b573 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -53,7 +53,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.326" +#define DC_VER "3.2.327" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- GitLab From dcc8e148e013f0d6b0a715e0aa05f033ca7945e9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 17 Jul 2024 14:58:00 +0800 Subject: [PATCH 089/899] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe reset Implement the kernel graphics queue pipe reset,and the driver will fallback to pipe reset when the queue reset fails. However, the ME FW hasn't fully supported pipe reset yet so disable the KGQ pipe reset temporarily. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 72 ++++++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4eedd92f000be..06fe21e15ed61 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -25,6 +25,8 @@ #include "amdgpu_socbb.h" +#define RS64_FW_UC_START_ADDR_LO 0x3000 + struct common_firmware_header { uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ uint32_t header_size_bytes; /* size of just the header in bytes */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index cedfcd1184301..42a08751af5af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6608,6 +6608,69 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + + +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, + * so the pipe reset status relies on the later gfx ring test result. + */ + return 0; +} + static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -6617,8 +6680,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return -EINVAL; r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); - if (r) - return r; + if (r) { + + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v11_reset_gfx_pipe(ring); + if (r) + return r; + } r = gfx_v11_0_kgq_init_queue(ring, true); if (r) { -- GitLab From d69248cf4c91949a7b83b3fd77c7c229336bb23c Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 21 Feb 2025 20:14:31 +0800 Subject: [PATCH 090/899] drm/amdgpu/gfx11: Implement the GFX11 KCQ pipe reset Implement the GFX11 compute pipe reset. As the GFX11 CPFW still hasn't fully supported pipe reset yet, therefore disable the KCQ pipe reset temporarily. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 136 ++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 42a08751af5af..7a9ab4c4b2f0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6703,6 +6703,136 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return amdgpu_ring_test_ring(ring); } +static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe + * reset status relies on the compute ring test result. + */ + return 0; +} + static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -6713,8 +6843,10 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v11_0_reset_compute_pipe(ring); + if (r) + return r; } r = gfx_v11_0_kcq_init_queue(ring, true); -- GitLab From 820116a39f96bdc7d426c33a804b52f53700a919 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Thu, 27 Mar 2025 12:04:35 +0800 Subject: [PATCH 091/899] drm/amd/pp: Fix potential NULL pointer dereference in atomctrl_initialize_mc_reg_table The function atomctrl_initialize_mc_reg_table() and atomctrl_initialize_mc_reg_table_v2_2() does not check the return value of smu_atom_get_data_table(). If smu_atom_get_data_table() fails to retrieve vram_info, it returns NULL which is later dereferenced. Fixes: b3892e2bb519 ("drm/amd/pp: Use atombios api directly in powerplay (v2)") Fixes: 5f92b48cf62c ("drm/amd/pm: add mc register table initialization") Signed-off-by: Charles Han Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 4bd92fd782be6..8d40ed0f0e838 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -143,6 +143,10 @@ int atomctrl_initialize_mc_reg_table( vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); @@ -180,6 +184,10 @@ int atomctrl_initialize_mc_reg_table_v2_2( vram_info = (ATOM_VRAM_INFO_HEADER_V2_2 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); -- GitLab From 340f1d9fcd6218e7c9a9506a37beaddb29f01b73 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 28 Mar 2025 01:10:15 -0400 Subject: [PATCH 092/899] drm/amdgpu: add missing SMU6 defines, shifts and masks They will be used later when switching away from sid.h/si_enums.h. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- .../drm/amd/include/asic_reg/smu/smu_6_0_d.h | 44 ++++ .../include/asic_reg/smu/smu_6_0_sh_mask.h | 188 +++++++++++++++++- 2 files changed, 229 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h index 6b10be61efc36..bdef1f743df78 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h @@ -41,7 +41,49 @@ #define ixLCAC_MC5_CNTL 0x012B #define ixLCAC_MC5_OVR_SEL 0x012C #define ixLCAC_MC5_OVR_VAL 0x012D + +#define mmCG_SPLL_FUNC_CNTL 0x0180 +#define mmCG_SPLL_FUNC_CNTL_2 0x0181 +#define mmCG_SPLL_FUNC_CNTL_3 0x0182 +#define mmCG_SPLL_FUNC_CNTL_4 0x0183 +#define mmCG_SPLL_STATUS 0x0185 +#define mmSPLL_CNTL_MODE 0x0186 +#define mmCG_SPLL_SPREAD_SPECTRUM 0x0188 +#define mmCG_SPLL_SPREAD_SPECTRUM_2 0x0189 +#define mmCG_SPLL_AUTOSCALE_CNTL 0x018B +#define mmMPLL_BYPASSCLK_SEL 0x0197 +#define mmCG_CLKPIN_CNTL 0x0198 +#define mmCG_CLKPIN_CNTL_2 0x0199 +#define mmTHM_CLK_CNTL 0x019B +#define mmMISC_CLK_CNTL 0x019C +#define mmCG_THERMAL_CTRL 0x01C0 +#define mmCG_THERMAL_STATUS 0x01C1 +#define mmCG_THERMAL_INT 0x01C2 +#define mmCG_MULT_THERMAL_CTRL 0x01C4 +#define mmCG_MULT_THERMAL_STATUS 0x01C5 +#define mmCG_FDO_CTRL0 0x01D5 +#define mmCG_FDO_CTRL1 0x01D6 +#define mmCG_FDO_CTRL2 0x01D7 +#define mmCG_TACH_CTRL 0x01DC +#define mmCG_TACH_STATUS 0x01DD +#define mmGENERAL_PWRMGT 0x1E0 +#define mmCG_TPC 0x1E1 +#define mmSCLK_PWRMGT_CNTL 0x1E2 +#define mmTARGET_AND_CURRENT_PROFILE_INDEX 0x01E6 +#define mmCG_FTV 0x01EF +#define mmCG_FFCT_0 0x01F0 +#define mmCG_BSP 0x01FF +#define mmCG_AT 0x0200 +#define mmCG_GIT 0x0201 +#define mmCG_SSP 0x0203 +#define mmCG_DISPLAY_GAP_CNTL 0x020A +#define mmCG_ULV_CONTROL 0x021E +#define mmCG_ULV_PARAMETER 0x021F +#define mmSMC_SCRATCH0 0x0221 +#define mmCG_CAC_CTRL 0x022E + #define ixSMC_PC_C 0x80000370 + #define ixTHM_TMON0_DEBUG 0x03F0 #define ixTHM_TMON0_INT_DATA 0x0380 #define ixTHM_TMON0_RDIL0_DATA 0x0300 @@ -110,6 +152,7 @@ #define ixTHM_TMON1_RDIR7_DATA 0x0337 #define ixTHM_TMON1_RDIR8_DATA 0x0338 #define ixTHM_TMON1_RDIR9_DATA 0x0339 + #define mmGPIOPAD_A 0x05E7 #define mmGPIOPAD_EN 0x05E8 #define mmGPIOPAD_EXTERN_TRIG_CNTL 0x05F1 @@ -127,6 +170,7 @@ #define mmGPIOPAD_STRENGTH 0x05E5 #define mmGPIOPAD_SW_INT_STAT 0x05E4 #define mmGPIOPAD_Y 0x05E9 + #define mmSMC_IND_ACCESS_CNTL 0x008A #define mmSMC_IND_DATA_0 0x0081 #define mmSMC_IND_DATA 0x0081 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h index 7d3925b7266e7..67d3c7e13a48d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h @@ -23,10 +23,142 @@ #ifndef SMU_6_0_SH_MASK_H #define SMU_6_0_SH_MASK_H -#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK 0x03ffffffL -#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT 0x00000000 -#define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK 0x000003f0L +#define CG_AT__CG_R_MASK 0x0000FFFFL +#define CG_AT__CG_R__SHIFT 0x00000000 +#define CG_AT__CG_L_MASK 0xFFFF0000L +#define CG_AT__CG_L__SHIFT 0x00000010 + +#define CG_BSP__BSP_MASK 0x0000FFFFL +#define CG_BSP__BSP__SHIFT 0x00000000 +#define CG_BSP__BSU_MASK 0x000F0000L +#define CG_BSP__BSU__SHIFT 0x00000010 + +#define CG_CAC_CTRL__CAC_WINDOW_MASK 0x00FFFFFFL +#define CG_CAC_CTRL__CAC_WINDOW__SHIFT 0x00000000 + +#define CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK 0x00000002L +#define CG_CLKPIN_CNTL__XTALIN_DIVIDE__SHIFT 0x00000001 +#define CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK 0x00000004L +#define CG_CLKPIN_CNTL__BCLK_AS_XCLK__SHIFT 0x00000002 +#define CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK 0x00000008L +#define CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT 0x00000003 +#define CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK 0x00000100L +#define CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK__SHIFT 0x00000008 + +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK 0x00000003L +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT 0x00000000 +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK 0x0000000CL +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT 0x00000002 +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_COUNT_MASK 0x0003FFF0L +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_COUNT__SHIFT 0x00000004 +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_UNIT_MASK 0x00700000 +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_UNIT__SHIFT 0x00000014 +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG_MASK 0x03000000L +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG__SHIFT 0x00000018 +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG_MASK 0x0C000000L +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG__SHIFT 0x0000001A + +#define CG_FFCT_0__UTC_0_MASK 0x000003FFL +#define CG_FFCT_0__UTC_0__SHIFT 0x00000000 +#define CG_FFCT_0__DTC_0_MASK 0x000FFC00L +#define CG_FFCT_0__DTC_0__SHIFT 0x0000000A + +#define CG_GIT__CG_GICST_MASK 0x0000FFFFL +#define CG_GIT__CG_GICST__SHIFT 0x00000000 +#define CG_GIT__CG_GIPOT_MASK 0xFFFF0000L +#define CG_GIT__CG_GIPOT__SHIFT 0x00000010 + +#define CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK 0x00000001L +#define CG_SPLL_FUNC_CNTL__SPLL_RESET__SHIFT 0x00000000 +#define CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK 0x00000002L +#define CG_SPLL_FUNC_CNTL__SPLL_SLEEP__SHIFT 0x00000001 +#define CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK 0x00000008L +#define CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT 0x00000003 +#define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK 0x000003F0L #define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV__SHIFT 0x00000004 +#define CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK 0x007F00000 +#define CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT 0x00000014 +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK 0x0000001FF +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT 0x00000000 +#define CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK 0x00800000 +#define CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT 0x00000017 +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK 0x04000000 +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE__SHIFT 0x0000001A +#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK 0x03FFFFFFL +#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT 0x00000000 +#define CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK 0x10000000L +#define CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN__SHIFT 0x0000001C +#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x00000002L +#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x00000001 +#define CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK 0x00000001L +#define CG_SPLL_SPREAD_SPECTRUM__SSEN__SHIFT 0x00000000 +#define CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK 0x0000FFF0L +#define CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT 0x00000004 +#define CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK 0x00000200L +#define CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT 0x00000000 +#define CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK 0x03FFFFFFL +#define CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR__SHIFT 0x00000009 + +#define CG_SSP__SST_MASK 0x0000FFFFL +#define CG_SSP__SST__SHIFT 0x00000000 +#define CG_SSP__SSTU_MASK 0x000F0000L +#define CG_SSP__SSTU__SHIFT 0x00000010 + +#define CG_THERMAL_CTRL__DPM_EVENT_SRC_MASK 0x00000007L +#define CG_THERMAL_CTRL__DPM_EVENT_SRC__SHIFT 0x00000000 +#define CG_THERMAL_CTRL__DIG_THERM_DPM_MASK 0x003FC000 +#define CG_THERMAL_CTRL__DIG_THERM_DPM__SHIFT 0x0000000E +#define CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK 0x0001FE00L +#define CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT 0x00000009 +#define CG_THERMAL_INT__DIG_THERM_INTH_MASK 0x0000FF00L +#define CG_THERMAL_INT__DIG_THERM_INTH__SHIFT 0x00000008 +#define CG_THERMAL_INT__DIG_THERM_INTL_MASK 0x00FF0000L +#define CG_THERMAL_INT__DIG_THERM_INTL__SHIFT 0x00000010 +#define CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK 0x01000000L +#define CG_THERMAL_INT__THERM_INT_MASK_HIGH__SHIFT 0x00000018 +#define CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK 0x02000000 +#define CG_THERMAL_INT__THERM_INT_MASK_LOW__SHIFT 0x00000019 + +#define CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK 0x0FF00000L +#define CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT 0x00000014 +#define CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK 0x000001FFL +#define CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT 0x00000000 +#define CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK 0x0003fe00L +#define CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT 0x00000009 + +#define CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK 0x000000FFL +#define CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT 0x00000000 +#define CG_FDO_CTRL1__FMAX_DUTY100_MASK 0x000000FFL +#define CG_FDO_CTRL1__FMAX_DUTY100__SHIFT 0x00000000 +#define CG_FDO_CTRL2__TMIN_MASK 0x000000FFL +#define CG_FDO_CTRL2__TMIN__SHIFT 0x00000000 +#define CG_FDO_CTRL2__FDO_PWM_MODE_MASK 0x00003800L +#define CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT 0x0000000B +#define CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK 0xFE000000L +#define CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT 0x00000019 + +#define CG_TACH_CTRL__EDGE_PER_REV_MASK 0x00000007L +#define CG_TACH_CTRL__EDGE_PER_REV__SHIFT 0x00000000 +#define CG_TACH_CTRL__TARGET_PERIOD_MASK 0xFFFFFFF8L +#define CG_TACH_CTRL__TARGET_PERIOD__SHIFT 0x00000003 +#define CG_TACH_STATUS__TACH_PERIOD_MASK 0xFFFFFFFFL +#define CG_TACH_STATUS__TACH_PERIOD__SHIFT 0x00000000 + +#define GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK 0x00000001L +#define GENERAL_PWRMGT__GLOBAL_PWRMGT_EN__SHIFT 0x00000000 +#define GENERAL_PWRMGT__STATIC_PM_EN_MASK 0x00000002L +#define GENERAL_PWRMGT__STATIC_PM_EN__SHIFT 0x00000001 +#define GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK 0x00000004L +#define GENERAL_PWRMGT__THERMAL_PROTECTION_DIS__SHIFT 0x00000002 +#define GENERAL_PWRMGT__THERMAL_PROTECTION_TYPE_MASK 0x00000008L +#define GENERAL_PWRMGT__THERMAL_PROTECTION_TYPE__SHIFT 0x00000003 +#define GENERAL_PWRMGT__SW_SMIO_INDEX_MASK 0x00000040L +#define GENERAL_PWRMGT__SW_SMIO_INDEX__SHIFT 0x00000006 +#define GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK 0x00000400L +#define GENERAL_PWRMGT__VOLT_PWRMGT_EN__SHIFT 0x0000000A +#define GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK 0x00800000L +#define GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN__SHIFT 0x00000017 + #define GPIOPAD_A__GPIO_A_MASK 0x7fffffffL #define GPIOPAD_A__GPIO_A__SHIFT 0x00000000 #define GPIOPAD_EN__GPIO_EN_MASK 0x7fffffffL @@ -195,6 +327,7 @@ #define GPIOPAD_SW_INT_STAT__SW_INT_STAT__SHIFT 0x00000000 #define GPIOPAD_Y__GPIO_Y_MASK 0x7fffffffL #define GPIOPAD_Y__GPIO_Y__SHIFT 0x00000000 + #define LCAC_MC0_CNTL__MC0_ENABLE_MASK 0x00000001L #define LCAC_MC0_CNTL__MC0_ENABLE__SHIFT 0x00000000 #define LCAC_MC0_CNTL__MC0_THRESHOLD_MASK 0x0001fffeL @@ -243,6 +376,37 @@ #define LCAC_MC5_OVR_SEL__MC5_OVR_SEL__SHIFT 0x00000000 #define LCAC_MC5_OVR_VAL__MC5_OVR_VAL_MASK 0xffffffffL #define LCAC_MC5_OVR_VAL__MC5_OVR_VAL__SHIFT 0x00000000 + +#define MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK 0x0000FF00L +#define MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT 0x00000008 + +#define SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK 0x00000001L +#define SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF__SHIFT 0x00000000 +#define SCLK_PWRMGT_CNTL__SCLK_LOW_D1_MASK 0x00000002L +#define SCLK_PWRMGT_CNTL__SCLK_LOW_D1__SHIFT 0x00000001 +#define SCLK_PWRMGT_CNTL__FIR_RESET_MASK 0x00000010L +#define SCLK_PWRMGT_CNTL__FIR_RESET__SHIFT 0x00000004 +#define SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK 0x00000020L +#define SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL__SHIFT 0x00000005 +#define SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK 0x00000040L +#define SCLK_PWRMGT_CNTL__FIR_TREND_MODE__SHIFT 0x00000006 +#define SCLK_PWRMGT_CNTL__DYN_GFX_CLK_OFF_EN_MASK 0x00000080L +#define SCLK_PWRMGT_CNTL__DYN_GFX_CLK_OFF_EN__SHIFT 0x00000007 +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_ON_MASK 0x00000100L +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_ON__SHIFT 0x00000008 +#define SCLK_PWRMGT_CNTL__GFX_CLK_REQUEST_OFF_MASK 0x00000200L +#define SCLK_PWRMGT_CNTL__GFX_CLK_REQUEST_OFF__SHIFT 0x00000009 +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_OFF_MASK 0x00000400L +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_OFF__SHIFT 0x0000000A +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D1_MASK 0x00000800L +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D1__SHIFT 0x0000000B +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D2_MASK 0x00001000L +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D2__SHIFT 0x0000000C +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D3_MASK 0x00002000L +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D3__SHIFT 0x0000000D +#define SCLK_PWRMGT_CNTL__DYN_LIGHT_SLEEP_EN_MASK 0x00004000L +#define SCLK_PWRMGT_CNTL__DYN_LIGHT_SLEEP_EN__SHIFT 0x0000000E + #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK 0x00000001L #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0__SHIFT 0x00000000 #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_1_MASK 0x00000100L @@ -285,6 +449,7 @@ #define SMC_RESP_1__SMC_RESP__SHIFT 0x00000000 #define SMC_RESP_2__SMC_RESP_MASK 0xffffffffL #define SMC_RESP_2__SMC_RESP__SHIFT 0x00000000 + #define SPLL_CNTL_MODE__SPLL_CTLREQ_DLY_CNT_MASK 0x000ff000L #define SPLL_CNTL_MODE__SPLL_CTLREQ_DLY_CNT__SHIFT 0x0000000c #define SPLL_CNTL_MODE__SPLL_ENSAT_MASK 0x00000010L @@ -293,6 +458,8 @@ #define SPLL_CNTL_MODE__SPLL_FASTEN__SHIFT 0x00000003 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x00000002L #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV__SHIFT 0x00000001 +#define SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK 0x0C000000L +#define SPLL_CNTL_MODE__SPLL_REFCLK_SEL__SHIFT 0x0000001A #define SPLL_CNTL_MODE__SPLL_RESET_EN_MASK 0x10000000L #define SPLL_CNTL_MODE__SPLL_RESET_EN__SHIFT 0x0000001c #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x00000001L @@ -303,10 +470,25 @@ #define SPLL_CNTL_MODE__SPLL_TEST__SHIFT 0x00000002 #define SPLL_CNTL_MODE__SPLL_VCO_MODE_MASK 0x60000000L #define SPLL_CNTL_MODE__SPLL_VCO_MODE__SHIFT 0x0000001d + #define TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK 0x0f000000L #define TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT 0x00000018 #define TARGET_AND_CURRENT_PROFILE_INDEX_1__TARG_PCIE_INDEX_MASK 0xf0000000L #define TARGET_AND_CURRENT_PROFILE_INDEX_1__TARG_PCIE_INDEX__SHIFT 0x0000001c + +#define TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK 0x000000F0L +#define TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT 0x00000004 + +#define THM_CLK_CNTL__CMON_CLK_SEL_MASK 0x000000FFL +#define THM_CLK_CNTL__CMON_CLK_SEL__SHIFT 0x00000000 +#define THM_CLK_CNTL__TMON_CLK_SEL_MASK 0x0000FF00L +#define THM_CLK_CNTL__TMON_CLK_SEL__SHIFT 0x00000008 + +#define MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK 0x000000FFL +#define MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT 0x00000000 +#define MISC_CLK_CNTL__ZCLK_SEL_MASK 0x0000FF00L +#define MISC_CLK_CNTL__ZCLK_SEL__SHIFT 0x00000008 + #define THM_TMON0_DEBUG__DEBUG_RDI_MASK 0x0000001fL #define THM_TMON0_DEBUG__DEBUG_RDI__SHIFT 0x00000000 #define THM_TMON0_DEBUG__DEBUG_Z_MASK 0x0000ffe0L -- GitLab From 4aa8de3d03124db71f24a9bd01198407294456fa Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 13 Dec 2024 17:10:35 +0800 Subject: [PATCH 093/899] drm/amdgpu/gfx12: Implement the gfx12 kgq pipe reset Implement the GFX12 kgq pipe reset, and temporarily disable the GFX12 pipe reset until the CPFW fully support it. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 69 +++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index ddac26b012bca..0490e04ac8a57 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5161,6 +5161,69 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block) amdgpu_gfx_off_ctrl(adev, true); } +static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + +static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v12_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v12_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v12_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* Sometimes the ME start pc counter can't cache correctly, so the + * PC check only as a reference and pipe reset result rely on the + * later ring test. + */ + return 0; +} + static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -5171,8 +5234,10 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { - dev_err(adev->dev, "reset via MES failed %d\n", r); - return r; + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v12_reset_gfx_pipe(ring); + if (r) + return r; } r = gfx_v12_0_kgq_init_queue(ring, true); -- GitLab From 0c6e39ce6da20104900b11bad64464a12fb47320 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 27 Mar 2025 11:38:42 -0400 Subject: [PATCH 094/899] drm/amdgpu: Add indirect L1_TLB_CNTL reg programming for VFs VFs on some IP versions are unable to access this register directly. This register must be programmed before PSP ring is setup, so use PSP VF mailbox directly. PSP will broadcast the register value to all VF assigned instances. Signed-off-by: Victor Skvortsov Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 10 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 12 +++- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 9 +-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 63 ++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 20 +++++++ 5 files changed, 93 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 8d5acc415d386..dcf5e8e0b9e3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -107,6 +107,7 @@ enum psp_reg_prog_id { PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_MMHUB_L1_TLB_CNTL = 25, PSP_REG_LAST }; @@ -142,6 +143,8 @@ struct psp_funcs { bool (*get_ras_capability)(struct psp_context *psp); bool (*is_aux_sos_load_required)(struct psp_context *psp); bool (*is_reload_needed)(struct psp_context *psp); + int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id); }; struct ta_funcs { @@ -475,6 +478,10 @@ struct amdgpu_psp_funcs { #define psp_is_aux_sos_load_required(psp) \ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) +#define psp_reg_program_no_ring(psp, val, id) \ + ((psp)->funcs->reg_program_no_ring ? \ + (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -569,5 +576,8 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp); int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev); +int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index df03dba67ab89..b6ec6b7969f0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -146,11 +146,13 @@ enum AMDGIM_FEATURE_FLAG { enum AMDGIM_REG_ACCESS_FLAG { /* Use PSP to program IH_RB_CNTL */ - AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), + AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), /* Use RLC to program MMHUB regs */ - AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), + AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), /* Use RLC to program GC regs */ - AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), + AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), + /* Use PSP to program L1_TLB_CNTL*/ + AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3), }; struct amdgim_pf2vf_info_v1 { @@ -330,6 +332,10 @@ struct amdgpu_video_codec_info; (amdgpu_sriov_vf((adev)) && \ ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN))) +#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \ +(amdgpu_sriov_vf((adev)) && \ + ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN))) + #define amdgpu_sriov_rlcg_error_report_enabled(adev) \ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index d6ac2652f0ac2..bea724981309c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -109,10 +109,11 @@ union amd_sriov_msg_feature_flags { union amd_sriov_reg_access_flags { struct { - uint32_t vf_reg_access_ih : 1; - uint32_t vf_reg_access_mmhub : 1; - uint32_t vf_reg_access_gc : 1; - uint32_t reserved : 29; + uint32_t vf_reg_access_ih : 1; + uint32_t vf_reg_access_mmhub : 1; + uint32_t vf_reg_access_gc : 1; + uint32_t vf_reg_access_l1_tlb_cntl : 1; + uint32_t reserved : 28; } flags; uint32_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 84cde1239ee45..4a43c9ab95a2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -30,6 +30,7 @@ #include "soc15_common.h" #include "soc15.h" #include "amdgpu_ras.h" +#include "amdgpu_psp.h" #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 @@ -192,10 +193,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) uint32_t tmp, inst_mask; int i; - /* Setup TLB control */ - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) { - tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) { + tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); @@ -209,7 +208,26 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL); + } else { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } } } @@ -454,6 +472,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) return 0; } +static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, inst_mask; + + if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) { + tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL); + } else { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } + } +} + static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; @@ -467,15 +509,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) for (i = 0; i < 16; i++) WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); - - /* Setup TLB control */ - tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, - 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp); - if (!amdgpu_sriov_vf(adev)) { /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL); @@ -485,6 +518,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0); } } + + mmhub_v1_8_disable_l1_tlb(adev); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index cc621064610f1..17f1ccd8bd534 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -858,6 +858,25 @@ static bool psp_v13_0_is_reload_needed(struct psp_context *psp) return false; } +static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id) +{ + struct amdgpu_device *adev = psp->adev; + int ret = -EOPNOTSUPP; + + /* PSP will broadcast the value to all instances */ + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -884,6 +903,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .get_ras_capability = psp_v13_0_get_ras_capability, .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, .is_reload_needed = psp_v13_0_is_reload_needed, + .reg_program_no_ring = psp_v13_0_reg_program_no_ring, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) -- GitLab From e66c07864e53253ba15c983f89ba68333b2bab22 Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Fri, 28 Mar 2025 13:43:53 -0400 Subject: [PATCH 095/899] drm/amdgpu: enable FW workaround for VCN 4_0_5 Enabling VCN FW workaround for drm key injection through shared memory for vcn 4_0_5 Signed-off-by: Boyuan Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2e..a8cfc63713ad6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -207,6 +207,10 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); -- GitLab From 4445c9dfa9547be58c80dccd3d08b1d31643a5c1 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 28 Mar 2025 01:10:16 -0400 Subject: [PATCH 096/899] drm/pm/legacy-dpm: move SI away from sid.h and si_enums.h Replace defines for the ones under smu_6_0_d.h and smu_6_0_sh_mask.h Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 338 +++++++++++---------- drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c | 36 +-- 2 files changed, 190 insertions(+), 184 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index c5db5cb8c44fa..4c0e976004ba4 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -2209,7 +2209,7 @@ static u32 si_calculate_cac_wintime(struct amdgpu_device *adev) if (xclk == 0) return 0; - cac_window = RREG32(CG_CAC_CTRL) & CAC_WINDOW_MASK; + cac_window = RREG32(mmCG_CAC_CTRL) & CG_CAC_CTRL__CAC_WINDOW_MASK; cac_window_size = ((cac_window & 0xFFFF0000) >> 16) * (cac_window & 0x0000FFFF); wintime = (cac_window_size * 100) / xclk; @@ -2505,19 +2505,19 @@ static int si_populate_sq_ramping_values(struct amdgpu_device *adev, if (adev->pm.dpm.sq_ramping_threshold == 0) return -EINVAL; - if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER > (MAX_POWER_MASK >> MAX_POWER_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER > (SQ_POWER_THROTTLE__MAX_POWER_MASK >> SQ_POWER_THROTTLE__MAX_POWER__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_MIN_POWER > (MIN_POWER_MASK >> MIN_POWER_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_MIN_POWER > (SQ_POWER_THROTTLE__MIN_POWER_MASK >> SQ_POWER_THROTTLE__MIN_POWER__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA > (MAX_POWER_DELTA_MASK >> MAX_POWER_DELTA_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA > (SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK >> SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (STI_SIZE_MASK >> STI_SIZE_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK >> SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (LTI_RATIO_MASK >> LTI_RATIO_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK >> SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT)) enable_sq_ramping = false; for (i = 0; i < state->performance_level_count; i++) { @@ -2526,14 +2526,17 @@ static int si_populate_sq_ramping_values(struct amdgpu_device *adev, if ((state->performance_levels[i].sclk >= adev->pm.dpm.sq_ramping_threshold) && enable_sq_ramping) { - sq_power_throttle |= MAX_POWER(SISLANDS_DPM2_SQ_RAMP_MAX_POWER); - sq_power_throttle |= MIN_POWER(SISLANDS_DPM2_SQ_RAMP_MIN_POWER); - sq_power_throttle2 |= MAX_POWER_DELTA(SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA); - sq_power_throttle2 |= STI_SIZE(SISLANDS_DPM2_SQ_RAMP_STI_SIZE); - sq_power_throttle2 |= LTI_RATIO(SISLANDS_DPM2_SQ_RAMP_LTI_RATIO); + sq_power_throttle |= SISLANDS_DPM2_SQ_RAMP_MAX_POWER << SQ_POWER_THROTTLE__MAX_POWER__SHIFT; + sq_power_throttle |= SISLANDS_DPM2_SQ_RAMP_MIN_POWER << SQ_POWER_THROTTLE__MIN_POWER__SHIFT; + sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA << SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT; + sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_STI_SIZE << SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT; + sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_LTI_RATIO << SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT; } else { - sq_power_throttle |= MAX_POWER_MASK | MIN_POWER_MASK; - sq_power_throttle2 |= MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; + sq_power_throttle |= SQ_POWER_THROTTLE__MAX_POWER_MASK | + SQ_POWER_THROTTLE__MIN_POWER_MASK; + sq_power_throttle2 |= SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | + SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | + SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK; } smc_state->levels[i].SQPowerThrottle = cpu_to_be32(sq_power_throttle); @@ -2777,9 +2780,9 @@ static int si_initialize_smc_cac_tables(struct amdgpu_device *adev) if (!cac_tables) return -ENOMEM; - reg = RREG32(CG_CAC_CTRL) & ~CAC_WINDOW_MASK; - reg |= CAC_WINDOW(si_pi->powertune_data->cac_window); - WREG32(CG_CAC_CTRL, reg); + reg = RREG32(mmCG_CAC_CTRL) & ~CG_CAC_CTRL__CAC_WINDOW_MASK; + reg |= (si_pi->powertune_data->cac_window << CG_CAC_CTRL__CAC_WINDOW__SHIFT); + WREG32(mmCG_CAC_CTRL, reg); si_pi->dyn_powertune_data.cac_leakage = adev->pm.dpm.cac_leakage; si_pi->dyn_powertune_data.dc_pwr_value = @@ -2978,10 +2981,10 @@ static int si_init_smc_spll_table(struct amdgpu_device *adev) ret = si_calculate_sclk_params(adev, sclk, &sclk_params); if (ret) break; - p_div = (sclk_params.vCG_SPLL_FUNC_CNTL & SPLL_PDIV_A_MASK) >> SPLL_PDIV_A_SHIFT; - fb_div = (sclk_params.vCG_SPLL_FUNC_CNTL_3 & SPLL_FB_DIV_MASK) >> SPLL_FB_DIV_SHIFT; - clk_s = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM & CLK_S_MASK) >> CLK_S_SHIFT; - clk_v = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM_2 & CLK_V_MASK) >> CLK_V_SHIFT; + p_div = (sclk_params.vCG_SPLL_FUNC_CNTL & CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK) >> CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT; + fb_div = (sclk_params.vCG_SPLL_FUNC_CNTL_3 & CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK) >> CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT; + clk_s = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM & CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK) >> CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT; + clk_v = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM_2 & CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK) >> CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT; fb_div &= ~0x00001FFF; fb_div >>= 1; @@ -3685,10 +3688,10 @@ static bool si_is_special_1gb_platform(struct amdgpu_device *adev) WREG32(MC_SEQ_IO_DEBUG_INDEX, 0xb); width = ((RREG32(MC_SEQ_IO_DEBUG_DATA) >> 1) & 1) ? 16 : 32; - tmp = RREG32(MC_ARB_RAMCFG); - row = ((tmp & NOOFROWS_MASK) >> NOOFROWS_SHIFT) + 10; - column = ((tmp & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT) + 8; - bank = ((tmp & NOOFBANK_MASK) >> NOOFBANK_SHIFT) + 2; + tmp = RREG32(mmMC_ARB_RAMCFG); + row = ((tmp & MC_ARB_RAMCFG__NOOFROWS_MASK) >> MC_ARB_RAMCFG__NOOFROWS__SHIFT) + 10; + column = ((tmp & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT) + 8; + bank = ((tmp & MC_ARB_RAMCFG__NOOFBANK_MASK) >> MC_ARB_RAMCFG__NOOFBANK__SHIFT) + 2; density = (1 << (row + column - 20 + bank)) * width; @@ -3772,11 +3775,11 @@ static void si_set_dpm_event_sources(struct amdgpu_device *adev, u32 sources) } if (want_thermal_protection) { - WREG32_P(CG_THERMAL_CTRL, DPM_EVENT_SRC(dpm_event_src), ~DPM_EVENT_SRC_MASK); + WREG32_P(mmCG_THERMAL_CTRL, dpm_event_src << CG_THERMAL_CTRL__DPM_EVENT_SRC__SHIFT, ~CG_THERMAL_CTRL__DPM_EVENT_SRC_MASK); if (pi->thermal_protection) - WREG32_P(GENERAL_PWRMGT, 0, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); } else { - WREG32_P(GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); } } @@ -3801,20 +3804,20 @@ static void si_enable_auto_throttle_source(struct amdgpu_device *adev, static void si_start_dpm(struct amdgpu_device *adev) { - WREG32_P(GENERAL_PWRMGT, GLOBAL_PWRMGT_EN, ~GLOBAL_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK, ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK); } static void si_stop_dpm(struct amdgpu_device *adev) { - WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK); } static void si_enable_sclk_control(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF); + WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK); else - WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF); + WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK, ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK); } @@ -3854,7 +3857,7 @@ static int si_notify_hw_of_powersource(struct amdgpu_device *adev, bool ac_power static PPSMC_Result si_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, PPSMC_Msg msg, u32 parameter) { - WREG32(SMC_SCRATCH0, parameter); + WREG32(mmSMC_SCRATCH0, parameter); return amdgpu_si_send_msg_to_smc(adev, msg); } @@ -4039,12 +4042,12 @@ static void si_read_clock_registers(struct amdgpu_device *adev) { struct si_power_info *si_pi = si_get_pi(adev); - si_pi->clock_registers.cg_spll_func_cntl = RREG32(CG_SPLL_FUNC_CNTL); - si_pi->clock_registers.cg_spll_func_cntl_2 = RREG32(CG_SPLL_FUNC_CNTL_2); - si_pi->clock_registers.cg_spll_func_cntl_3 = RREG32(CG_SPLL_FUNC_CNTL_3); - si_pi->clock_registers.cg_spll_func_cntl_4 = RREG32(CG_SPLL_FUNC_CNTL_4); - si_pi->clock_registers.cg_spll_spread_spectrum = RREG32(CG_SPLL_SPREAD_SPECTRUM); - si_pi->clock_registers.cg_spll_spread_spectrum_2 = RREG32(CG_SPLL_SPREAD_SPECTRUM_2); + si_pi->clock_registers.cg_spll_func_cntl = RREG32(mmCG_SPLL_FUNC_CNTL); + si_pi->clock_registers.cg_spll_func_cntl_2 = RREG32(mmCG_SPLL_FUNC_CNTL_2); + si_pi->clock_registers.cg_spll_func_cntl_3 = RREG32(mmCG_SPLL_FUNC_CNTL_3); + si_pi->clock_registers.cg_spll_func_cntl_4 = RREG32(mmCG_SPLL_FUNC_CNTL_4); + si_pi->clock_registers.cg_spll_spread_spectrum = RREG32(mmCG_SPLL_SPREAD_SPECTRUM); + si_pi->clock_registers.cg_spll_spread_spectrum_2 = RREG32(mmCG_SPLL_SPREAD_SPECTRUM_2); si_pi->clock_registers.dll_cntl = RREG32(DLL_CNTL); si_pi->clock_registers.mclk_pwrmgt_cntl = RREG32(MCLK_PWRMGT_CNTL); si_pi->clock_registers.mpll_ad_func_cntl = RREG32(MPLL_AD_FUNC_CNTL); @@ -4060,14 +4063,14 @@ static void si_enable_thermal_protection(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_P(GENERAL_PWRMGT, 0, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); else - WREG32_P(GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); } static void si_enable_acpi_power_management(struct amdgpu_device *adev) { - WREG32_P(GENERAL_PWRMGT, STATIC_PM_EN, ~STATIC_PM_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__STATIC_PM_EN_MASK, ~GENERAL_PWRMGT__STATIC_PM_EN_MASK); } #if 0 @@ -4148,9 +4151,9 @@ static void si_program_ds_registers(struct amdgpu_device *adev) tmp = 0x1; if (eg_pi->sclk_deep_sleep) { - WREG32_P(MISC_CLK_CNTL, DEEP_SLEEP_CLK_SEL(tmp), ~DEEP_SLEEP_CLK_SEL_MASK); - WREG32_P(CG_SPLL_AUTOSCALE_CNTL, AUTOSCALE_ON_SS_CLEAR, - ~AUTOSCALE_ON_SS_CLEAR); + WREG32_P(mmMISC_CLK_CNTL, (tmp << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT), ~MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK); + WREG32_P(mmCG_SPLL_AUTOSCALE_CNTL, CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK, + ~CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK); } } @@ -4159,18 +4162,18 @@ static void si_program_display_gap(struct amdgpu_device *adev) u32 tmp, pipe; int i; - tmp = RREG32(CG_DISPLAY_GAP_CNTL) & ~(DISP1_GAP_MASK | DISP2_GAP_MASK); + tmp = RREG32(mmCG_DISPLAY_GAP_CNTL) & ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK); if (adev->pm.dpm.new_active_crtc_count > 0) - tmp |= DISP1_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM); + tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; else - tmp |= DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE); + tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; if (adev->pm.dpm.new_active_crtc_count > 1) - tmp |= DISP2_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM); + tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; else - tmp |= DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE); + tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; - WREG32(CG_DISPLAY_GAP_CNTL, tmp); + WREG32(mmCG_DISPLAY_GAP_CNTL, tmp); tmp = RREG32(DCCG_DISP_SLOW_SELECT_REG); pipe = (tmp & DCCG_DISP1_SLOW_SELECT_MASK) >> DCCG_DISP1_SLOW_SELECT_SHIFT; @@ -4205,10 +4208,10 @@ static void si_enable_spread_spectrum(struct amdgpu_device *adev, bool enable) if (enable) { if (pi->sclk_ss) - WREG32_P(GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, ~DYN_SPREAD_SPECTRUM_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK, ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK); } else { - WREG32_P(CG_SPLL_SPREAD_SPECTRUM, 0, ~SSEN); - WREG32_P(GENERAL_PWRMGT, 0, ~DYN_SPREAD_SPECTRUM_EN); + WREG32_P(mmCG_SPLL_SPREAD_SPECTRUM, 0, ~CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK); } } @@ -4230,15 +4233,15 @@ static void si_setup_bsp(struct amdgpu_device *adev) &pi->pbsu); - pi->dsp = BSP(pi->bsp) | BSU(pi->bsu); - pi->psp = BSP(pi->pbsp) | BSU(pi->pbsu); + pi->dsp = (pi->bsp << CG_BSP__BSP__SHIFT) | (pi->bsu << CG_BSP__BSU__SHIFT); + pi->psp = (pi->pbsp << CG_BSP__BSP__SHIFT) | (pi->pbsu << CG_BSP__BSU__SHIFT); - WREG32(CG_BSP, pi->dsp); + WREG32(mmCG_BSP, pi->dsp); } static void si_program_git(struct amdgpu_device *adev) { - WREG32_P(CG_GIT, CG_GICST(R600_GICST_DFLT), ~CG_GICST_MASK); + WREG32_P(mmCG_GIT, R600_GICST_DFLT << CG_GIT__CG_GICST__SHIFT, ~CG_GIT__CG_GICST_MASK); } static void si_program_tp(struct amdgpu_device *adev) @@ -4247,54 +4250,54 @@ static void si_program_tp(struct amdgpu_device *adev) enum r600_td td = R600_TD_DFLT; for (i = 0; i < R600_PM_NUMBER_OF_TC; i++) - WREG32(CG_FFCT_0 + i, (UTC_0(r600_utc[i]) | DTC_0(r600_dtc[i]))); + WREG32(mmCG_FFCT_0 + i, (r600_utc[i] << CG_FFCT_0__UTC_0__SHIFT | r600_dtc[i] << CG_FFCT_0__DTC_0__SHIFT)); if (td == R600_TD_AUTO) - WREG32_P(SCLK_PWRMGT_CNTL, 0, ~FIR_FORCE_TREND_SEL); + WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK); else - WREG32_P(SCLK_PWRMGT_CNTL, FIR_FORCE_TREND_SEL, ~FIR_FORCE_TREND_SEL); + WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK, ~SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK); if (td == R600_TD_UP) - WREG32_P(SCLK_PWRMGT_CNTL, 0, ~FIR_TREND_MODE); + WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK); if (td == R600_TD_DOWN) - WREG32_P(SCLK_PWRMGT_CNTL, FIR_TREND_MODE, ~FIR_TREND_MODE); + WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK, ~SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK); } static void si_program_tpp(struct amdgpu_device *adev) { - WREG32(CG_TPC, R600_TPC_DFLT); + WREG32(mmCG_TPC, R600_TPC_DFLT); } static void si_program_sstp(struct amdgpu_device *adev) { - WREG32(CG_SSP, (SSTU(R600_SSTU_DFLT) | SST(R600_SST_DFLT))); + WREG32(mmCG_SSP, (R600_SSTU_DFLT << CG_SSP__SSTU__SHIFT| R600_SST_DFLT << CG_SSP__SST__SHIFT)); } static void si_enable_display_gap(struct amdgpu_device *adev) { - u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL); + u32 tmp = RREG32(mmCG_DISPLAY_GAP_CNTL); - tmp &= ~(DISP1_GAP_MASK | DISP2_GAP_MASK); - tmp |= (DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE) | - DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE)); + tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK); + tmp |= (R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT | + R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT); - tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK); - tmp |= (DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK) | - DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE)); - WREG32(CG_DISPLAY_GAP_CNTL, tmp); + tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG_MASK); + tmp |= (R600_PM_DISPLAY_GAP_VBLANK << CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG__SHIFT | + R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG__SHIFT); + WREG32(mmCG_DISPLAY_GAP_CNTL, tmp); } static void si_program_vc(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); - WREG32(CG_FTV, pi->vrc); + WREG32(mmCG_FTV, pi->vrc); } static void si_clear_vc(struct amdgpu_device *adev) { - WREG32(CG_FTV, 0); + WREG32(mmCG_FTV, 0); } static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) @@ -4751,7 +4754,7 @@ static u32 si_calculate_memory_refresh_rate(struct amdgpu_device *adev, u32 dram_rows; u32 dram_refresh_rate; u32 mc_arb_rfsh_rate; - u32 tmp = (RREG32(MC_ARB_RAMCFG) & NOOFROWS_MASK) >> NOOFROWS_SHIFT; + u32 tmp = (RREG32(mmMC_ARB_RAMCFG) & MC_ARB_RAMCFG__NOOFROWS_MASK) >> MC_ARB_RAMCFG__NOOFROWS__SHIFT; if (tmp >= 4) dram_rows = 16384; @@ -4925,7 +4928,7 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd); - reg = CG_R(0xffff) | CG_L(0); + reg = 0xffff << CG_AT__CG_R__SHIFT | 0 << CG_AT__CG_L__SHIFT; table->initialState.level.aT = cpu_to_be32(reg); table->initialState.level.bSP = cpu_to_be32(pi->dsp); table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen; @@ -4951,10 +4954,13 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, table->initialState.level.dpm2.BelowSafeInc = 0; table->initialState.level.dpm2.PwrEfficiencyRatio = 0; - reg = MIN_POWER_MASK | MAX_POWER_MASK; + reg = SQ_POWER_THROTTLE__MIN_POWER_MASK | + SQ_POWER_THROTTLE__MAX_POWER_MASK; table->initialState.level.SQPowerThrottle = cpu_to_be32(reg); - reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; + reg = SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | + SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | + SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK; table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; @@ -5073,8 +5079,8 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, dll_cntl &= ~(MRDCK0_BYPASS | MRDCK1_BYPASS); - spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; - spll_func_cntl_2 |= SCLK_MUX_SEL(4); + spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK; + spll_func_cntl_2 |= 4 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT; table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl); @@ -5118,10 +5124,10 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, table->ACPIState.level.dpm2.BelowSafeInc = 0; table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0; - reg = MIN_POWER_MASK | MAX_POWER_MASK; + reg = SQ_POWER_THROTTLE__MIN_POWER_MASK | SQ_POWER_THROTTLE__MAX_POWER_MASK; table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg); - reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; + reg = SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK; table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; @@ -5266,8 +5272,8 @@ static int si_init_smc_table(struct amdgpu_device *adev) if (ret) return ret; - WREG32(CG_ULV_CONTROL, ulv->cg_ulv_control); - WREG32(CG_ULV_PARAMETER, ulv->cg_ulv_parameter); + WREG32(mmCG_ULV_CONTROL, ulv->cg_ulv_control); + WREG32(mmCG_ULV_PARAMETER, ulv->cg_ulv_parameter); lane_width = amdgpu_get_pcie_lanes(adev); si_write_smc_soft_register(adev, SI_SMC_SOFT_REGISTER_non_ulv_pcie_link_width, lane_width); @@ -5310,16 +5316,16 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev, do_div(tmp, reference_clock); fbdiv = (u32) tmp; - spll_func_cntl &= ~(SPLL_PDIV_A_MASK | SPLL_REF_DIV_MASK); - spll_func_cntl |= SPLL_REF_DIV(dividers.ref_div); - spll_func_cntl |= SPLL_PDIV_A(dividers.post_div); + spll_func_cntl &= ~(CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK | CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK); + spll_func_cntl |= dividers.ref_div << CG_SPLL_FUNC_CNTL__SPLL_REF_DIV__SHIFT; + spll_func_cntl |= dividers.post_div << CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT; - spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; - spll_func_cntl_2 |= SCLK_MUX_SEL(2); + spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK; + spll_func_cntl_2 |= 2 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT; - spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK; - spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv); - spll_func_cntl_3 |= SPLL_DITHEN; + spll_func_cntl_3 &= ~CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK; + spll_func_cntl_3 |= fbdiv << CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT; + spll_func_cntl_3 |= CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK; if (pi->sclk_ss) { struct amdgpu_atom_ss ss; @@ -5330,12 +5336,12 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev, u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate); u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000); - cg_spll_spread_spectrum &= ~CLK_S_MASK; - cg_spll_spread_spectrum |= CLK_S(clk_s); - cg_spll_spread_spectrum |= SSEN; + cg_spll_spread_spectrum &= ~CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK; + cg_spll_spread_spectrum |= clk_s << CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT; + cg_spll_spread_spectrum |= CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK; - cg_spll_spread_spectrum_2 &= ~CLK_V_MASK; - cg_spll_spread_spectrum_2 |= CLK_V(clk_v); + cg_spll_spread_spectrum_2 &= ~CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK; + cg_spll_spread_spectrum_2 |= clk_v << CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT; } } @@ -5501,7 +5507,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev, if (pi->mclk_stutter_mode_threshold && (pl->mclk <= pi->mclk_stutter_mode_threshold) && !eg_pi->uvd_enabled && - (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) && + (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) && (adev->pm.dpm.new_active_crtc_count <= 2)) { level->mcFlags |= SISLANDS_SMC_MC_STUTTER_EN; } @@ -5595,7 +5601,7 @@ static int si_populate_smc_t(struct amdgpu_device *adev, return -EINVAL; if (state->performance_level_count < 2) { - a_t = CG_R(0xffff) | CG_L(0); + a_t = 0xffff << CG_AT__CG_R__SHIFT | 0 << CG_AT__CG_L__SHIFT; smc_state->levels[0].aT = cpu_to_be32(a_t); return 0; } @@ -5616,13 +5622,13 @@ static int si_populate_smc_t(struct amdgpu_device *adev, t_l = (i + 1) * 1000 + 50 * R600_AH_DFLT; } - a_t = be32_to_cpu(smc_state->levels[i].aT) & ~CG_R_MASK; - a_t |= CG_R(t_l * pi->bsp / 20000); + a_t = be32_to_cpu(smc_state->levels[i].aT) & ~CG_AT__CG_R_MASK; + a_t |= (t_l * pi->bsp / 20000) << CG_AT__CG_R__SHIFT; smc_state->levels[i].aT = cpu_to_be32(a_t); high_bsp = (i == state->performance_level_count - 2) ? pi->pbsp : pi->bsp; - a_t = CG_R(0xffff) | CG_L(t_h * high_bsp / 20000); + a_t = (0xffff) << CG_AT__CG_R__SHIFT | (t_h * high_bsp / 20000) << CG_AT__CG_L__SHIFT; smc_state->levels[i + 1].aT = cpu_to_be32(a_t); } @@ -6196,9 +6202,9 @@ static int si_upload_mc_reg_table(struct amdgpu_device *adev, static void si_enable_voltage_control(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK, ~GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK); else - WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK); } static enum si_pcie_gen si_get_maximum_link_speed(struct amdgpu_device *adev, @@ -6220,8 +6226,8 @@ static u16 si_get_current_pcie_speed(struct amdgpu_device *adev) { u32 speed_cntl; - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL) & LC_CURRENT_DATA_RATE_MASK; - speed_cntl >>= LC_CURRENT_DATA_RATE_SHIFT; + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL) & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK; + speed_cntl >>= PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; return (u16)speed_cntl; } @@ -6428,21 +6434,21 @@ static void si_dpm_setup_asic(struct amdgpu_device *adev) static int si_thermal_enable_alert(struct amdgpu_device *adev, bool enable) { - u32 thermal_int = RREG32(CG_THERMAL_INT); + u32 thermal_int = RREG32(mmCG_THERMAL_INT); if (enable) { PPSMC_Result result; - thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); - WREG32(CG_THERMAL_INT, thermal_int); + thermal_int &= ~(CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK | CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK); + WREG32(mmCG_THERMAL_INT, thermal_int); result = amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt); if (result != PPSMC_Result_OK) { DRM_DEBUG_KMS("Could not enable thermal interrupts.\n"); return -EINVAL; } } else { - thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; - WREG32(CG_THERMAL_INT, thermal_int); + thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK | CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK; + WREG32(mmCG_THERMAL_INT, thermal_int); } return 0; @@ -6463,9 +6469,9 @@ static int si_thermal_set_temperature_range(struct amdgpu_device *adev, return -EINVAL; } - WREG32_P(CG_THERMAL_INT, DIG_THERM_INTH(high_temp / 1000), ~DIG_THERM_INTH_MASK); - WREG32_P(CG_THERMAL_INT, DIG_THERM_INTL(low_temp / 1000), ~DIG_THERM_INTL_MASK); - WREG32_P(CG_THERMAL_CTRL, DIG_THERM_DPM(high_temp / 1000), ~DIG_THERM_DPM_MASK); + WREG32_P(mmCG_THERMAL_INT, (high_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTH__SHIFT, ~CG_THERMAL_INT__DIG_THERM_INTH_MASK); + WREG32_P(mmCG_THERMAL_INT, (low_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTL__SHIFT, ~CG_THERMAL_INT__DIG_THERM_INTL_MASK); + WREG32_P(mmCG_THERMAL_CTRL, (high_temp / 1000) << CG_THERMAL_CTRL__DIG_THERM_DPM__SHIFT, ~CG_THERMAL_CTRL__DIG_THERM_DPM_MASK); adev->pm.dpm.thermal.min_temp = low_temp; adev->pm.dpm.thermal.max_temp = high_temp; @@ -6479,20 +6485,20 @@ static void si_fan_ctrl_set_static_mode(struct amdgpu_device *adev, u32 mode) u32 tmp; if (si_pi->fan_ctrl_is_in_default_mode) { - tmp = (RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT; + tmp = (RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK) >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; si_pi->fan_ctrl_default_mode = tmp; - tmp = (RREG32(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT; + tmp = (RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__TMIN_MASK) >> CG_FDO_CTRL2__TMIN__SHIFT; si_pi->t_min = tmp; si_pi->fan_ctrl_is_in_default_mode = false; } - tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK; - tmp |= TMIN(0); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK; + tmp |= 0 << CG_FDO_CTRL2__TMIN__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); - tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK; - tmp |= FDO_PWM_MODE(mode); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK; + tmp |= mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); } static int si_thermal_setup_fan_table(struct amdgpu_device *adev) @@ -6511,7 +6517,7 @@ static int si_thermal_setup_fan_table(struct amdgpu_device *adev) return 0; } - duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; + duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; if (duty100 == 0) { adev->pm.dpm.fan.ucode_fan_control = false; @@ -6547,7 +6553,7 @@ static int si_thermal_setup_fan_table(struct amdgpu_device *adev) reference_clock) / 1600); fan_table.fdo_max = cpu_to_be16((u16)duty100); - tmp = (RREG32(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT; + tmp = (RREG32(mmCG_MULT_THERMAL_CTRL) & CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK) >> CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT; fan_table.temp_src = (uint8_t)tmp; ret = amdgpu_si_copy_bytes_to_smc(adev, @@ -6606,8 +6612,8 @@ static int si_dpm_get_fan_speed_pwm(void *handle, if (adev->pm.no_fan) return -ENOENT; - duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; - duty = (RREG32(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT; + duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; + duty = (RREG32(mmCG_THERMAL_STATUS) & CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK) >> CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT; if (duty100 == 0) return -EINVAL; @@ -6637,7 +6643,7 @@ static int si_dpm_set_fan_speed_pwm(void *handle, if (speed > 255) return -EINVAL; - duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; + duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; if (duty100 == 0) return -EINVAL; @@ -6646,9 +6652,9 @@ static int si_dpm_set_fan_speed_pwm(void *handle, do_div(tmp64, 255); duty = (u32)tmp64; - tmp = RREG32(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK; - tmp |= FDO_STATIC_DUTY(duty); - WREG32(CG_FDO_CTRL0, tmp); + tmp = RREG32(mmCG_FDO_CTRL0) & ~CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK; + tmp |= duty << CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT; + WREG32(mmCG_FDO_CTRL0, tmp); return 0; } @@ -6688,8 +6694,8 @@ static int si_dpm_get_fan_control_mode(void *handle, u32 *fan_mode) if (si_pi->fan_is_controlled_by_smc) return 0; - tmp = RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK; - *fan_mode = (tmp >> FDO_PWM_MODE_SHIFT); + tmp = RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK; + *fan_mode = (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT); return 0; } @@ -6707,7 +6713,7 @@ static int si_fan_ctrl_get_fan_speed_rpm(struct amdgpu_device *adev, if (adev->pm.fan_pulses_per_revolution == 0) return -ENOENT; - tach_period = (RREG32(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT; + tach_period = (RREG32(mmCG_TACH_STATUS) & CG_TACH_STATUS__TACH_PERIOD_MASK) >> CG_TACH_STATUS__TACH_PERIOD__SHIFT; if (tach_period == 0) return -ENOENT; @@ -6736,9 +6742,9 @@ static int si_fan_ctrl_set_fan_speed_rpm(struct amdgpu_device *adev, si_fan_ctrl_stop_smc_fan_control(adev); tach_period = 60 * xclk * 10000 / (8 * speed); - tmp = RREG32(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK; - tmp |= TARGET_PERIOD(tach_period); - WREG32(CG_TACH_CTRL, tmp); + tmp = RREG32(mmCG_TACH_CTRL) & ~CG_TACH_CTRL__TARGET_PERIOD_MASK; + tmp |= tach_period << CG_TACH_CTRL__TARGET_PERIOD__SHIFT; + WREG32(mmCG_TACH_CTRL, tmp); si_fan_ctrl_set_static_mode(adev, FDO_PWM_MODE_STATIC_RPM); @@ -6752,13 +6758,13 @@ static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev) u32 tmp; if (!si_pi->fan_ctrl_is_in_default_mode) { - tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK; - tmp |= FDO_PWM_MODE(si_pi->fan_ctrl_default_mode); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK; + tmp |= si_pi->fan_ctrl_default_mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); - tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK; - tmp |= TMIN(si_pi->t_min); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK; + tmp |= si_pi->t_min << CG_FDO_CTRL2__TMIN__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); si_pi->fan_ctrl_is_in_default_mode = true; } } @@ -6776,14 +6782,14 @@ static void si_thermal_initialize(struct amdgpu_device *adev) u32 tmp; if (adev->pm.fan_pulses_per_revolution) { - tmp = RREG32(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK; - tmp |= EDGE_PER_REV(adev->pm.fan_pulses_per_revolution -1); - WREG32(CG_TACH_CTRL, tmp); + tmp = RREG32(mmCG_TACH_CTRL) & ~CG_TACH_CTRL__EDGE_PER_REV_MASK; + tmp |= (adev->pm.fan_pulses_per_revolution -1) << CG_TACH_CTRL__EDGE_PER_REV__SHIFT; + WREG32(mmCG_TACH_CTRL, tmp); } - tmp = RREG32(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK; - tmp |= TACH_PWM_RESP_RATE(0x28); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK; + tmp |= 0x28 << CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); } static int si_thermal_start_thermal_controller(struct amdgpu_device *adev) @@ -7546,8 +7552,8 @@ static void si_dpm_debugfs_print_current_performance_level(void *handle, struct si_ps *ps = si_get_ps(rps); struct rv7xx_pl *pl; u32 current_index = - (RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >> - CURRENT_STATE_INDEX_SHIFT; + (RREG32(mmTARGET_AND_CURRENT_PROFILE_INDEX) & TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK) >> + TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT; if (current_index >= ps->performance_level_count) { seq_printf(m, "invalid dpm profile %d\n", current_index); @@ -7570,14 +7576,14 @@ static int si_dpm_set_interrupt_state(struct amdgpu_device *adev, case AMDGPU_THERMAL_IRQ_LOW_TO_HIGH: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int |= THERM_INT_MASK_HIGH; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; case AMDGPU_IRQ_STATE_ENABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int &= ~THERM_INT_MASK_HIGH; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int &= ~CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; default: break; @@ -7587,14 +7593,14 @@ static int si_dpm_set_interrupt_state(struct amdgpu_device *adev, case AMDGPU_THERMAL_IRQ_HIGH_TO_LOW: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int |= THERM_INT_MASK_LOW; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; case AMDGPU_IRQ_STATE_ENABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int &= ~THERM_INT_MASK_LOW; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int &= ~CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; default: break; @@ -7899,8 +7905,8 @@ static int si_dpm_get_temp(void *handle) int actual_temp = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> - CTF_TEMP_SHIFT; + temp = (RREG32(mmCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; if (temp & 0x200) actual_temp = 255; @@ -8030,8 +8036,8 @@ static int si_dpm_read_sensor(void *handle, int idx, struct si_ps *ps = si_get_ps(rps); uint32_t sclk, mclk; u32 pl_index = - (RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >> - CURRENT_STATE_INDEX_SHIFT; + (RREG32(mmTARGET_AND_CURRENT_PROFILE_INDEX) & TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK) >> + TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT; /* size must be at least 4 bytes for all sensors */ if (*size < 4) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c index c712899c44cac..4e65ab9e931c9 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c @@ -44,8 +44,8 @@ static int si_set_smc_sram_address(struct amdgpu_device *adev, if ((smc_address + 3) > limit) return -EINVAL; - WREG32(SMC_IND_INDEX_0, smc_address); - WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + WREG32(mmSMC_IND_INDEX_0, smc_address); + WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); return 0; } @@ -74,7 +74,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev, if (ret) goto done; - WREG32(SMC_IND_DATA_0, data); + WREG32(mmSMC_IND_DATA_0, data); src += 4; byte_count -= 4; @@ -89,7 +89,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev, if (ret) goto done; - original_data = RREG32(SMC_IND_DATA_0); + original_data = RREG32(mmSMC_IND_DATA_0); extra_shift = 8 * (4 - byte_count); while (byte_count > 0) { @@ -105,7 +105,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev, if (ret) goto done; - WREG32(SMC_IND_DATA_0, data); + WREG32(mmSMC_IND_DATA_0, data); } done: @@ -127,10 +127,10 @@ void amdgpu_si_reset_smc(struct amdgpu_device *adev) { u32 tmp; - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL) | RST_REG; @@ -176,16 +176,16 @@ PPSMC_Result amdgpu_si_send_msg_to_smc(struct amdgpu_device *adev, if (!amdgpu_si_is_smc_running(adev)) return PPSMC_Result_Failed; - WREG32(SMC_MESSAGE_0, msg); + WREG32(mmSMC_MESSAGE_0, msg); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SMC_RESP_0); + tmp = RREG32(mmSMC_RESP_0); if (tmp != 0) break; udelay(1); } - return (PPSMC_Result)RREG32(SMC_RESP_0); + return (PPSMC_Result)RREG32(mmSMC_RESP_0); } PPSMC_Result amdgpu_si_wait_for_smc_inactive(struct amdgpu_device *adev) @@ -231,18 +231,18 @@ int amdgpu_si_load_smc_ucode(struct amdgpu_device *adev, u32 limit) return -EINVAL; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, ucode_start_address); - WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0); + WREG32(mmSMC_IND_INDEX_0, ucode_start_address); + WREG32_P(mmSMC_IND_ACCESS_CNTL, SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); while (ucode_size >= 4) { /* SMC address space is BE */ data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; - WREG32(SMC_IND_DATA_0, data); + WREG32(mmSMC_IND_DATA_0, data); src += 4; ucode_size -= 4; } - WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return 0; @@ -257,7 +257,7 @@ int amdgpu_si_read_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, spin_lock_irqsave(&adev->smc_idx_lock, flags); ret = si_set_smc_sram_address(adev, smc_address, limit); if (ret == 0) - *value = RREG32(SMC_IND_DATA_0); + *value = RREG32(mmSMC_IND_DATA_0); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return ret; @@ -272,7 +272,7 @@ int amdgpu_si_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, spin_lock_irqsave(&adev->smc_idx_lock, flags); ret = si_set_smc_sram_address(adev, smc_address, limit); if (ret == 0) - WREG32(SMC_IND_DATA_0, value); + WREG32(mmSMC_IND_DATA_0, value); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return ret; -- GitLab From d3cd9565c6269c39cf812eaed25071e537c1eb5d Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 28 Mar 2025 01:10:17 -0400 Subject: [PATCH 097/899] drm/amdgpu: move si.c away from sid.h Replace defines by the ones added earlier to GFX6, SMU6 and DCE6 Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 365 ++++++++++++++++---------------- 1 file changed, 183 insertions(+), 182 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 94a6ec162640e..023fe880c9e34 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1085,8 +1085,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg) u32 r; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, (reg)); - r = RREG32(SMC_IND_DATA_0); + WREG32(mmSMC_IND_INDEX_0, (reg)); + r = RREG32(mmSMC_IND_DATA_0); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return r; } @@ -1096,8 +1096,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, (reg)); - WREG32(SMC_IND_DATA_0, (v)); + WREG32(mmSMC_IND_INDEX_0, (reg)); + WREG32(mmSMC_IND_DATA_0, (v)); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } @@ -1124,7 +1124,7 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) } static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { - {GRBM_STATUS}, + {mmGRBM_STATUS}, {mmGRBM_STATUS2}, {mmGRBM_STATUS_SE0}, {mmGRBM_STATUS_SE1}, @@ -1136,8 +1136,8 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmCP_STALLED_STAT1}, {mmCP_STALLED_STAT2}, {mmCP_STALLED_STAT3}, - {GB_ADDR_CONFIG}, - {MC_ARB_RAMCFG}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, {mmGB_TILE_MODE0}, {mmGB_TILE_MODE1}, {mmGB_TILE_MODE2}, @@ -1170,7 +1170,7 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmGB_TILE_MODE29}, {mmGB_TILE_MODE30}, {mmGB_TILE_MODE31}, - {CC_RB_BACKEND_DISABLE, true}, + {mmCC_RB_BACKEND_DISABLE, true}, {mmGC_USER_RB_BACKEND_DISABLE, true}, {mmPA_SC_RASTER_CONFIG, true}, }; @@ -1282,7 +1282,7 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) if (adev->mode_info.num_crtc) { d1vga_control = RREG32(AVIVO_D1VGA_CONTROL); d2vga_control = RREG32(AVIVO_D2VGA_CONTROL); - vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_render_control = RREG32(mmVGA_RENDER_CONTROL); } rom_cntl = RREG32(R600_ROM_CNTL); @@ -1296,8 +1296,8 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) WREG32(AVIVO_D2VGA_CONTROL, (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | AVIVO_DVGA_CONTROL_TIMING_SELECT))); - WREG32(VGA_RENDER_CONTROL, - (vga_render_control & C_000300_VGA_VSTATUS_CNTL)); + WREG32(mmVGA_RENDER_CONTROL, + (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK)); } WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE); @@ -1308,7 +1308,7 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) if (adev->mode_info.num_crtc) { WREG32(AVIVO_D1VGA_CONTROL, d1vga_control); WREG32(AVIVO_D2VGA_CONTROL, d2vga_control); - WREG32(VGA_RENDER_CONTROL, vga_render_control); + WREG32(mmVGA_RENDER_CONTROL, vga_render_control); } WREG32(R600_ROM_CNTL, rom_cntl); return r; @@ -1345,23 +1345,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev) { u32 tmp, i; - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_BYPASS_EN; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL_2); - tmp |= SPLL_CTLREQ_CHG; - WREG32(CG_SPLL_FUNC_CNTL_2, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2); + tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp); for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS) + if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK) break; udelay(1); } - tmp = RREG32(CG_SPLL_FUNC_CNTL_2); - tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE); - WREG32(CG_SPLL_FUNC_CNTL_2, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2); + tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK | + CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK); + WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp); tmp = RREG32(MPLL_CNTL_MODE); tmp &= ~MPLL_MCLK_SEL; @@ -1372,21 +1373,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev) { u32 tmp; - tmp = RREG32(SPLL_CNTL_MODE); - tmp |= SPLL_SW_DIR_CONTROL; - WREG32(SPLL_CNTL_MODE, tmp); + tmp = RREG32(mmSPLL_CNTL_MODE); + tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK; + WREG32(mmSPLL_CNTL_MODE, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_RESET; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_SLEEP; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(SPLL_CNTL_MODE); - tmp &= ~SPLL_SW_DIR_CONTROL; - WREG32(SPLL_CNTL_MODE, tmp); + tmp = RREG32(mmSPLL_CNTL_MODE); + tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK; + WREG32(mmSPLL_CNTL_MODE, tmp); } static int si_gpu_pci_config_reset(struct amdgpu_device *adev) @@ -1468,14 +1469,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state) { uint32_t temp; - temp = RREG32(CONFIG_CNTL); + temp = RREG32(mmCONFIG_CNTL); if (!state) { temp &= ~(1<<0); temp |= (1<<1); } else { temp &= ~(1<<1); } - WREG32(CONFIG_CNTL, temp); + WREG32(mmCONFIG_CNTL, temp); } static u32 si_get_xclk(struct amdgpu_device *adev) @@ -1483,12 +1484,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; u32 tmp; - tmp = RREG32(CG_CLKPIN_CNTL_2); - if (tmp & MUX_TCLK_TO_XCLK) + tmp = RREG32(mmCG_CLKPIN_CNTL_2); + if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK) return TCLK; - tmp = RREG32(CG_CLKPIN_CNTL); - if (tmp & XTALIN_DIVIDE) + tmp = RREG32(mmCG_CLKPIN_CNTL); + if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK) return reference_clock / 4; return reference_clock; @@ -1533,9 +1534,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return 0; - link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); - switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) { + switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) { case LC_LINK_WIDTH_X1: return 1; case LC_LINK_WIDTH_X2: @@ -1582,13 +1583,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) return; } - link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - link_width_cntl &= ~LC_LINK_WIDTH_MASK; - link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT; - link_width_cntl |= (LC_RECONFIG_NOW | - LC_RECONFIG_ARC_MISSING_ESCAPE); + link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK; + link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT; + link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK | + PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK); - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); } static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, @@ -2032,7 +2033,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = static uint32_t si_get_rev_id(struct amdgpu_device *adev) { - return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) + return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT; } @@ -2253,9 +2254,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) return; - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> - LC_CURRENT_DATA_RATE_SHIFT; + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >> + PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate == 2) { DRM_INFO("PCIE gen 3 link speeds already enabled\n"); @@ -2282,17 +2283,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); - tmp = RREG32_PCIE(PCIE_LC_STATUS1); - max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; - current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; + tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); + max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT; + current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT; if (current_lw < max_lw) { - tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - if (tmp & LC_RENEGOTIATION_SUPPORT) { - tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); - tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); - tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) { + tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK); + tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT); + tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp); } } @@ -2315,13 +2316,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) PCI_EXP_LNKCTL2, &gpu_cfg2); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp |= LC_SET_QUIESCE; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp |= LC_REDO_EQ; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); mdelay(100); @@ -2347,16 +2348,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) (PCI_EXP_LNKCTL2_ENTER_COMP | PCI_EXP_LNKCTL2_TX_MARGIN)); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp &= ~LC_SET_QUIESCE; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); } } } - speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; - speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; - WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK; + speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl); tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) @@ -2368,13 +2369,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, tmp16); - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; - WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl); for (i = 0; i < adev->usec_timeout; i++) { - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0) break; udelay(1); } @@ -2432,121 +2433,121 @@ static void si_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); - data &= ~LC_XMIT_N_FTS_MASK; - data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL); + data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK; + data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data); - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); - data |= LC_GO_TO_RECOVERY; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data); - orig = data = RREG32_PCIE(PCIE_P_CNTL); - data |= P_IGNORE_EDB_ERR; + orig = data = RREG32_PCIE(ixPCIE_P_CNTL); + data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK; if (orig != data) - WREG32_PCIE(PCIE_P_CNTL, data); + WREG32_PCIE(ixPCIE_P_CNTL, data); - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); - data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); - data |= LC_PMI_TO_L1_DIS; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL); + data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK); + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (!disable_l0s) - data |= LC_L0S_INACTIVITY(7); + data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT); if (!disable_l1) { - data |= LC_L1_INACTIVITY(7); - data &= ~LC_PMI_TO_L1_DIS; + data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT); + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); if (!disable_plloff_in_l1) { bool clk_req_support; - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); - data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); - data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0); + data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1); - data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); - data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1); + data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0); - data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); - data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0); + data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1); - data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); - data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1); + data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data); if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); - data &= ~PLL_RAMP_UP_TIME_0_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0); + data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1); - data &= ~PLL_RAMP_UP_TIME_1_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1); + data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2); - data &= ~PLL_RAMP_UP_TIME_2_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2); + data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3); - data &= ~PLL_RAMP_UP_TIME_3_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3); + data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0); - data &= ~PLL_RAMP_UP_TIME_0_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0); + data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1); - data &= ~PLL_RAMP_UP_TIME_1_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1); + data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2); - data &= ~PLL_RAMP_UP_TIME_2_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2); + data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3); - data &= ~PLL_RAMP_UP_TIME_3_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3); + data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data); } - orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - data &= ~LC_DYN_LANES_PWR_STATE_MASK; - data |= LC_DYN_LANES_PWR_STATE(3); + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK; + data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT); if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); - data &= ~LS2_EXIT_TIME_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL); + data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK; if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) - data |= LS2_EXIT_TIME(5); + data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); - data &= ~LS2_EXIT_TIME_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL); + data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK; if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) - data |= LS2_EXIT_TIME(5); + data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data); if (!disable_clkreq && !pci_is_root_bus(adev->pdev->bus)) { @@ -2562,64 +2563,64 @@ static void si_program_aspm(struct amdgpu_device *adev) } if (clk_req_support) { - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); - data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2); + data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data); - orig = data = RREG32(THM_CLK_CNTL); - data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); - data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); + orig = data = RREG32(mmTHM_CLK_CNTL); + data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK); + data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT); if (orig != data) - WREG32(THM_CLK_CNTL, data); + WREG32(mmTHM_CLK_CNTL, data); - orig = data = RREG32(MISC_CLK_CNTL); - data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); - data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); + orig = data = RREG32(mmMISC_CLK_CNTL); + data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK); + data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT); if (orig != data) - WREG32(MISC_CLK_CNTL, data); + WREG32(mmMISC_CLK_CNTL, data); - orig = data = RREG32(CG_CLKPIN_CNTL); - data &= ~BCLK_AS_XCLK; + orig = data = RREG32(mmCG_CLKPIN_CNTL); + data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK; if (orig != data) - WREG32(CG_CLKPIN_CNTL, data); + WREG32(mmCG_CLKPIN_CNTL, data); - orig = data = RREG32(CG_CLKPIN_CNTL_2); - data &= ~FORCE_BIF_REFCLK_EN; + orig = data = RREG32(mmCG_CLKPIN_CNTL_2); + data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK; if (orig != data) - WREG32(CG_CLKPIN_CNTL_2, data); + WREG32(mmCG_CLKPIN_CNTL_2, data); - orig = data = RREG32(MPLL_BYPASSCLK_SEL); - data &= ~MPLL_CLKOUT_SEL_MASK; - data |= MPLL_CLKOUT_SEL(4); + orig = data = RREG32(mmMPLL_BYPASSCLK_SEL); + data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK; + data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT; if (orig != data) - WREG32(MPLL_BYPASSCLK_SEL, data); + WREG32(mmMPLL_BYPASSCLK_SEL, data); - orig = data = RREG32(SPLL_CNTL_MODE); - data &= ~SPLL_REFCLK_SEL_MASK; + orig = data = RREG32(mmSPLL_CNTL_MODE); + data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK; if (orig != data) - WREG32(SPLL_CNTL_MODE, data); + WREG32(mmSPLL_CNTL_MODE, data); } } } else { if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); } - orig = data = RREG32_PCIE(PCIE_CNTL2); - data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; + orig = data = RREG32_PCIE(ixPCIE_CNTL2); + data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK; if (orig != data) - WREG32_PCIE(PCIE_CNTL2, data); + WREG32_PCIE(ixPCIE_CNTL2, data); if (!disable_l0s) { - data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); - if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { - data = RREG32_PCIE(PCIE_LC_STATUS1); - if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); - data &= ~LC_L0S_INACTIVITY_MASK; + data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL); + if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) { + data = RREG32_PCIE(ixPCIE_LC_STATUS1); + if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) { + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); } } } -- GitLab From 060708d1fa2887a7140c4a9a3c719228e50dab7c Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 28 Mar 2025 01:10:18 -0400 Subject: [PATCH 098/899] drm/amdgpu: huge sid.h cleanup, drop substituted defines. Now that we are using the proper defines, cleanup useless old "substituted" defines. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sid.h | 1211 +----------------------------- 1 file changed, 2 insertions(+), 1209 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 2a43ede5dff4f..cd7a531fb3a15 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -24,43 +24,12 @@ #ifndef SI_H #define SI_H -#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 - -#define SI_MAX_SH_GPRS 256 -#define SI_MAX_TEMP_GPRS 16 -#define SI_MAX_SH_THREADS 256 -#define SI_MAX_SH_STACK_ENTRIES 4096 -#define SI_MAX_FRC_EOV_CNT 16384 -#define SI_MAX_BACKENDS 8 -#define SI_MAX_BACKENDS_MASK 0xFF -#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F -#define SI_MAX_SIMDS 12 -#define SI_MAX_SIMDS_MASK 0x0FFF -#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF -#define SI_MAX_PIPES 8 -#define SI_MAX_PIPES_MASK 0xFF -#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F -#define SI_MAX_LDS_NUM 0xFFFF -#define SI_MAX_TCC 16 -#define SI_MAX_TCC_MASK 0xFFFF #define SI_MAX_CTLACKS_ASSERTION_WAIT 100 -/* SMC IND accessor regs */ -#define SMC_IND_INDEX_0 0x80 -#define SMC_IND_DATA_0 0x81 - -#define SMC_IND_ACCESS_CNTL 0x8A -# define AUTO_INCREMENT_IND_0 (1 << 0) -#define SMC_MESSAGE_0 0x8B -#define SMC_RESP_0 0x8C - /* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */ #define SMC_CG_IND_START 0xc0030000 #define SMC_CG_IND_END 0xc0040000 -#define CG_CGTT_LOCAL_0 0x400 -#define CG_CGTT_LOCAL_1 0x401 - /* SMC IND registers */ #define SMC_SYSCON_RESET_CNTL 0x80000000 # define RST_REG (1 << 0) @@ -68,9 +37,6 @@ # define CK_DISABLE (1 << 0) # define CKEN (1 << 24) -#define VGA_HDP_CONTROL 0xCA -#define VGA_MEMORY_DISABLE (1 << 4) - #define DCCG_DISP_SLOW_SELECT_REG 0x13F #define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0) #define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0) @@ -79,47 +45,6 @@ #define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4) #define DCCG_DISP2_SLOW_SELECT_SHIFT 4 -#define CG_SPLL_FUNC_CNTL 0x180 -#define SPLL_RESET (1 << 0) -#define SPLL_SLEEP (1 << 1) -#define SPLL_BYPASS_EN (1 << 3) -#define SPLL_REF_DIV(x) ((x) << 4) -#define SPLL_REF_DIV_MASK (0x3f << 4) -#define SPLL_PDIV_A(x) ((x) << 20) -#define SPLL_PDIV_A_MASK (0x7f << 20) -#define SPLL_PDIV_A_SHIFT 20 -#define CG_SPLL_FUNC_CNTL_2 0x181 -#define SCLK_MUX_SEL(x) ((x) << 0) -#define SCLK_MUX_SEL_MASK (0x1ff << 0) -#define SPLL_CTLREQ_CHG (1 << 23) -#define SCLK_MUX_UPDATE (1 << 26) -#define CG_SPLL_FUNC_CNTL_3 0x182 -#define SPLL_FB_DIV(x) ((x) << 0) -#define SPLL_FB_DIV_MASK (0x3ffffff << 0) -#define SPLL_FB_DIV_SHIFT 0 -#define SPLL_DITHEN (1 << 28) -#define CG_SPLL_FUNC_CNTL_4 0x183 - -#define SPLL_STATUS 0x185 -#define SPLL_CHG_STATUS (1 << 1) -#define SPLL_CNTL_MODE 0x186 -#define SPLL_SW_DIR_CONTROL (1 << 0) -# define SPLL_REFCLK_SEL(x) ((x) << 26) -# define SPLL_REFCLK_SEL_MASK (3 << 26) - -#define CG_SPLL_SPREAD_SPECTRUM 0x188 -#define SSEN (1 << 0) -#define CLK_S(x) ((x) << 4) -#define CLK_S_MASK (0xfff << 4) -#define CLK_S_SHIFT 4 -#define CG_SPLL_SPREAD_SPECTRUM_2 0x189 -#define CLK_V(x) ((x) << 0) -#define CLK_V_MASK (0x3ffffff << 0) -#define CLK_V_SHIFT 0 - -#define CG_SPLL_AUTOSCALE_CNTL 0x18b -# define AUTOSCALE_ON_SS_CLEAR (1 << 9) - /* discrete uvd clocks */ #define CG_UPLL_FUNC_CNTL 0x18d # define UPLL_RESET_MASK 0x00000001 @@ -149,308 +74,9 @@ #define CG_UPLL_SPREAD_SPECTRUM 0x194 # define SSEN_MASK 0x00000001 -#define MPLL_BYPASSCLK_SEL 0x197 -# define MPLL_CLKOUT_SEL(x) ((x) << 8) -# define MPLL_CLKOUT_SEL_MASK 0xFF00 - -#define CG_CLKPIN_CNTL 0x198 -# define XTALIN_DIVIDE (1 << 1) -# define BCLK_AS_XCLK (1 << 2) -#define CG_CLKPIN_CNTL_2 0x199 -# define FORCE_BIF_REFCLK_EN (1 << 3) -# define MUX_TCLK_TO_XCLK (1 << 8) - -#define THM_CLK_CNTL 0x19b -# define CMON_CLK_SEL(x) ((x) << 0) -# define CMON_CLK_SEL_MASK 0xFF -# define TMON_CLK_SEL(x) ((x) << 8) -# define TMON_CLK_SEL_MASK 0xFF00 -#define MISC_CLK_CNTL 0x19c -# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0) -# define DEEP_SLEEP_CLK_SEL_MASK 0xFF -# define ZCLK_SEL(x) ((x) << 8) -# define ZCLK_SEL_MASK 0xFF00 - -#define CG_THERMAL_CTRL 0x1c0 -#define DPM_EVENT_SRC(x) ((x) << 0) -#define DPM_EVENT_SRC_MASK (7 << 0) -#define DIG_THERM_DPM(x) ((x) << 14) -#define DIG_THERM_DPM_MASK 0x003FC000 -#define DIG_THERM_DPM_SHIFT 14 -#define CG_THERMAL_STATUS 0x1c1 -#define FDO_PWM_DUTY(x) ((x) << 9) -#define FDO_PWM_DUTY_MASK (0xff << 9) -#define FDO_PWM_DUTY_SHIFT 9 -#define CG_THERMAL_INT 0x1c2 -#define DIG_THERM_INTH(x) ((x) << 8) -#define DIG_THERM_INTH_MASK 0x0000FF00 -#define DIG_THERM_INTH_SHIFT 8 -#define DIG_THERM_INTL(x) ((x) << 16) -#define DIG_THERM_INTL_MASK 0x00FF0000 -#define DIG_THERM_INTL_SHIFT 16 -#define THERM_INT_MASK_HIGH (1 << 24) -#define THERM_INT_MASK_LOW (1 << 25) - -#define CG_MULT_THERMAL_CTRL 0x1c4 -#define TEMP_SEL(x) ((x) << 20) -#define TEMP_SEL_MASK (0xff << 20) -#define TEMP_SEL_SHIFT 20 -#define CG_MULT_THERMAL_STATUS 0x1c5 -#define ASIC_MAX_TEMP(x) ((x) << 0) -#define ASIC_MAX_TEMP_MASK 0x000001ff -#define ASIC_MAX_TEMP_SHIFT 0 -#define CTF_TEMP(x) ((x) << 9) -#define CTF_TEMP_MASK 0x0003fe00 -#define CTF_TEMP_SHIFT 9 - -#define CG_FDO_CTRL0 0x1d5 -#define FDO_STATIC_DUTY(x) ((x) << 0) -#define FDO_STATIC_DUTY_MASK 0x000000FF -#define FDO_STATIC_DUTY_SHIFT 0 -#define CG_FDO_CTRL1 0x1d6 -#define FMAX_DUTY100(x) ((x) << 0) -#define FMAX_DUTY100_MASK 0x000000FF -#define FMAX_DUTY100_SHIFT 0 -#define CG_FDO_CTRL2 0x1d7 -#define TMIN(x) ((x) << 0) -#define TMIN_MASK 0x000000FF -#define TMIN_SHIFT 0 -#define FDO_PWM_MODE(x) ((x) << 11) -#define FDO_PWM_MODE_MASK (7 << 11) -#define FDO_PWM_MODE_SHIFT 11 -#define TACH_PWM_RESP_RATE(x) ((x) << 25) -#define TACH_PWM_RESP_RATE_MASK (0x7f << 25) -#define TACH_PWM_RESP_RATE_SHIFT 25 - -#define CG_TACH_CTRL 0x1dc -# define EDGE_PER_REV(x) ((x) << 0) -# define EDGE_PER_REV_MASK (0x7 << 0) -# define EDGE_PER_REV_SHIFT 0 -# define TARGET_PERIOD(x) ((x) << 3) -# define TARGET_PERIOD_MASK 0xfffffff8 -# define TARGET_PERIOD_SHIFT 3 -#define CG_TACH_STATUS 0x1dd -# define TACH_PERIOD(x) ((x) << 0) -# define TACH_PERIOD_MASK 0xffffffff -# define TACH_PERIOD_SHIFT 0 - -#define GENERAL_PWRMGT 0x1e0 -# define GLOBAL_PWRMGT_EN (1 << 0) -# define STATIC_PM_EN (1 << 1) -# define THERMAL_PROTECTION_DIS (1 << 2) -# define THERMAL_PROTECTION_TYPE (1 << 3) -# define SW_SMIO_INDEX(x) ((x) << 6) -# define SW_SMIO_INDEX_MASK (1 << 6) -# define SW_SMIO_INDEX_SHIFT 6 -# define VOLT_PWRMGT_EN (1 << 10) -# define DYN_SPREAD_SPECTRUM_EN (1 << 23) -#define CG_TPC 0x1e1 -#define SCLK_PWRMGT_CNTL 0x1e2 -# define SCLK_PWRMGT_OFF (1 << 0) -# define SCLK_LOW_D1 (1 << 1) -# define FIR_RESET (1 << 4) -# define FIR_FORCE_TREND_SEL (1 << 5) -# define FIR_TREND_MODE (1 << 6) -# define DYN_GFX_CLK_OFF_EN (1 << 7) -# define GFX_CLK_FORCE_ON (1 << 8) -# define GFX_CLK_REQUEST_OFF (1 << 9) -# define GFX_CLK_FORCE_OFF (1 << 10) -# define GFX_CLK_OFF_ACPI_D1 (1 << 11) -# define GFX_CLK_OFF_ACPI_D2 (1 << 12) -# define GFX_CLK_OFF_ACPI_D3 (1 << 13) -# define DYN_LIGHT_SLEEP_EN (1 << 14) - -#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6 -# define CURRENT_STATE_INDEX_MASK (0xf << 4) -# define CURRENT_STATE_INDEX_SHIFT 4 - -#define CG_FTV 0x1ef - -#define CG_FFCT_0 0x1f0 -# define UTC_0(x) ((x) << 0) -# define UTC_0_MASK (0x3ff << 0) -# define DTC_0(x) ((x) << 10) -# define DTC_0_MASK (0x3ff << 10) - -#define CG_BSP 0x1ff -# define BSP(x) ((x) << 0) -# define BSP_MASK (0xffff << 0) -# define BSU(x) ((x) << 16) -# define BSU_MASK (0xf << 16) -#define CG_AT 0x200 -# define CG_R(x) ((x) << 0) -# define CG_R_MASK (0xffff << 0) -# define CG_L(x) ((x) << 16) -# define CG_L_MASK (0xffff << 16) - -#define CG_GIT 0x201 -# define CG_GICST(x) ((x) << 0) -# define CG_GICST_MASK (0xffff << 0) -# define CG_GIPOT(x) ((x) << 16) -# define CG_GIPOT_MASK (0xffff << 16) - -#define CG_SSP 0x203 -# define SST(x) ((x) << 0) -# define SST_MASK (0xffff << 0) -# define SSTU(x) ((x) << 16) -# define SSTU_MASK (0xf << 16) - -#define CG_DISPLAY_GAP_CNTL 0x20a -# define DISP1_GAP(x) ((x) << 0) -# define DISP1_GAP_MASK (3 << 0) -# define DISP2_GAP(x) ((x) << 2) -# define DISP2_GAP_MASK (3 << 2) -# define VBI_TIMER_COUNT(x) ((x) << 4) -# define VBI_TIMER_COUNT_MASK (0x3fff << 4) -# define VBI_TIMER_UNIT(x) ((x) << 20) -# define VBI_TIMER_UNIT_MASK (7 << 20) -# define DISP1_GAP_MCHG(x) ((x) << 24) -# define DISP1_GAP_MCHG_MASK (3 << 24) -# define DISP2_GAP_MCHG(x) ((x) << 26) -# define DISP2_GAP_MCHG_MASK (3 << 26) - -#define CG_ULV_CONTROL 0x21e -#define CG_ULV_PARAMETER 0x21f - -#define SMC_SCRATCH0 0x221 - -#define CG_CAC_CTRL 0x22e -# define CAC_WINDOW(x) ((x) << 0) -# define CAC_WINDOW_MASK 0x00ffffff - -#define DMIF_ADDR_CONFIG 0x2F5 - -#define DMIF_ADDR_CALC 0x300 - -#define PIPE0_DMIF_BUFFER_CONTROL 0x0328 -# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) -# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) - -#define SRBM_STATUS 0x394 -#define GRBM_RQ_PENDING (1 << 5) -#define VMC_BUSY (1 << 8) -#define MCB_BUSY (1 << 9) -#define MCB_NON_DISPLAY_BUSY (1 << 10) -#define MCC_BUSY (1 << 11) -#define MCD_BUSY (1 << 12) -#define SEM_BUSY (1 << 14) -#define IH_BUSY (1 << 17) - -#define SRBM_SOFT_RESET 0x398 -#define SOFT_RESET_BIF (1 << 1) -#define SOFT_RESET_DC (1 << 5) -#define SOFT_RESET_DMA1 (1 << 6) -#define SOFT_RESET_GRBM (1 << 8) -#define SOFT_RESET_HDP (1 << 9) -#define SOFT_RESET_IH (1 << 10) -#define SOFT_RESET_MC (1 << 11) -#define SOFT_RESET_ROM (1 << 14) -#define SOFT_RESET_SEM (1 << 15) -#define SOFT_RESET_VMC (1 << 17) -#define SOFT_RESET_DMA (1 << 20) -#define SOFT_RESET_TST (1 << 21) -#define SOFT_RESET_REGBB (1 << 22) -#define SOFT_RESET_ORB (1 << 23) - -#define CC_SYS_RB_BACKEND_DISABLE 0x3A0 -#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1 - -#define SRBM_READ_ERROR 0x3A6 -#define SRBM_INT_CNTL 0x3A8 -#define SRBM_INT_ACK 0x3AA - -#define SRBM_STATUS2 0x3B1 -#define DMA_BUSY (1 << 5) -#define DMA1_BUSY (1 << 6) - -#define VM_L2_CNTL 0x500 -#define ENABLE_L2_CACHE (1 << 0) -#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) -#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2) -#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4) -#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) -#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10) -#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15) -#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19) -#define VM_L2_CNTL2 0x501 -#define INVALIDATE_ALL_L1_TLBS (1 << 0) -#define INVALIDATE_L2_CACHE (1 << 1) -#define INVALIDATE_CACHE_MODE(x) ((x) << 26) -#define INVALIDATE_PTE_AND_PDE_CACHES 0 -#define INVALIDATE_ONLY_PTE_CACHES 1 -#define INVALIDATE_ONLY_PDE_CACHES 2 -#define VM_L2_CNTL3 0x502 -#define BANK_SELECT(x) ((x) << 0) -#define L2_CACHE_UPDATE_MODE(x) ((x) << 6) -#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15) -#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20) -#define VM_L2_STATUS 0x503 -#define L2_BUSY (1 << 0) -#define VM_CONTEXT0_CNTL 0x504 -#define ENABLE_CONTEXT (1 << 0) -#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) -#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3) -#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) -#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6) -#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7) -#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9) -#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10) -#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12) -#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13) -#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15) -#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) -#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) -#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) -#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24) -#define VM_CONTEXT1_CNTL 0x505 -#define VM_CONTEXT0_CNTL2 0x50C -#define VM_CONTEXT1_CNTL2 0x50D -#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E -#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F -#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510 -#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511 -#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512 -#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513 -#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514 -#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515 - -#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f -#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537 -#define PROTECTIONS_MASK (0xf << 0) -#define PROTECTIONS_SHIFT 0 - /* bit 0: range - * bit 1: pde0 - * bit 2: valid - * bit 3: read - * bit 4: write - */ -#define MEMORY_CLIENT_ID_MASK (0xff << 12) -#define MEMORY_CLIENT_ID_SHIFT 12 -#define MEMORY_CLIENT_RW_MASK (1 << 24) -#define MEMORY_CLIENT_RW_SHIFT 24 -#define FAULT_VMID_MASK (0xf << 25) -#define FAULT_VMID_SHIFT 25 - #define VM_INVALIDATE_REQUEST 0x51E #define VM_INVALIDATE_RESPONSE 0x51F -#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546 -#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547 - -#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F -#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550 -#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551 -#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552 -#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553 -#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554 -#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555 -#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556 -#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557 -#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558 - -#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F -#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560 - #define VM_L2_CG 0x570 #define MC_CG_ENABLE (1 << 18) #define MC_LS_ENABLE (1 << 19) @@ -491,21 +117,6 @@ #define MC_CITF_MISC_WR_CG 0x993 #define MC_CITF_MISC_VM_CG 0x994 -#define MC_ARB_RAMCFG 0x9D8 -#define NOOFBANK_SHIFT 0 -#define NOOFBANK_MASK 0x00000003 -#define NOOFRANK_SHIFT 2 -#define NOOFRANK_MASK 0x00000004 -#define NOOFROWS_SHIFT 3 -#define NOOFROWS_MASK 0x00000038 -#define NOOFCOLS_SHIFT 6 -#define NOOFCOLS_MASK 0x000000C0 -#define CHANSIZE_SHIFT 8 -#define CHANSIZE_MASK 0x00000100 -#define CHANSIZE_OVERRIDE (1 << 11) -#define NOOFGROUPS_SHIFT 12 -#define NOOFGROUPS_MASK 0x00001000 - #define MC_ARB_DRAM_TIMING 0x9DD #define MC_ARB_DRAM_TIMING2 0x9DE @@ -631,20 +242,6 @@ #define CLKS(x) ((x) << 0) #define CLKS_MASK (0xfff << 0) -#define HDP_HOST_PATH_CNTL 0xB00 -#define CLOCK_GATING_DIS (1 << 23) -#define HDP_NONSURFACE_BASE 0xB01 -#define HDP_NONSURFACE_INFO 0xB02 -#define HDP_NONSURFACE_SIZE 0xB03 - -#define HDP_DEBUG0 0xBCC - -#define HDP_ADDR_CONFIG 0xBD2 -#define HDP_MISC_CNTL 0xBD3 -#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) -#define HDP_MEM_POWER_LS 0xBD4 -#define HDP_LS_ENABLE (1 << 0) - #define ATC_MISC_CG 0xCD4 #define IH_RB_CNTL 0xF80 @@ -674,8 +271,6 @@ # define MC_WR_CLEAN_CNT(x) ((x) << 20) # define MC_VMID(x) ((x) << 25) -#define CONFIG_MEMSIZE 0x150A - #define INTERRUPT_CNTL 0x151A # define IH_DUMMY_RD_OVERRIDE (1 << 0) # define IH_DUMMY_RD_EN (1 << 1) @@ -683,461 +278,22 @@ # define GEN_IH_INT_EN (1 << 8) #define INTERRUPT_CNTL2 0x151B -#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520 - -#define BIF_FB_EN 0x1524 -#define FB_READ_EN (1 << 0) -#define FB_WRITE_EN (1 << 1) - -#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528 - -/* DCE6 ELD audio interface */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */ -# define MAX_CHANNELS(x) (((x) & 0x7) << 0) -/* max channels minus one. 7 = 8 channels */ -# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) -# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) -# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ -/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO - * bit0 = 32 kHz - * bit1 = 44.1 kHz - * bit2 = 48 kHz - * bit3 = 88.2 kHz - * bit4 = 96 kHz - * bit5 = 176.4 kHz - * bit6 = 192 kHz - */ - -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37 -# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0) -# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8) -/* VIDEO_LIPSYNC, AUDIO_LIPSYNC - * 0 = invalid - * x = legal delay value - * 255 = sync not supported - */ -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38 -# define HBR_CAPABLE (1 << 0) /* enabled by default */ - -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a -# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0) -# define PRODUCT_ID(x) (((x) & 0xffff) << 16) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b -# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c -# define PORT_ID0(x) (((x) & 0xffffffff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d -# define PORT_ID1(x) (((x) & 0xffffffff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e -# define DESCRIPTION0(x) (((x) & 0xff) << 0) -# define DESCRIPTION1(x) (((x) & 0xff) << 8) -# define DESCRIPTION2(x) (((x) & 0xff) << 16) -# define DESCRIPTION3(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f -# define DESCRIPTION4(x) (((x) & 0xff) << 0) -# define DESCRIPTION5(x) (((x) & 0xff) << 8) -# define DESCRIPTION6(x) (((x) & 0xff) << 16) -# define DESCRIPTION7(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40 -# define DESCRIPTION8(x) (((x) & 0xff) << 0) -# define DESCRIPTION9(x) (((x) & 0xff) << 8) -# define DESCRIPTION10(x) (((x) & 0xff) << 16) -# define DESCRIPTION11(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41 -# define DESCRIPTION12(x) (((x) & 0xff) << 0) -# define DESCRIPTION13(x) (((x) & 0xff) << 8) -# define DESCRIPTION14(x) (((x) & 0xff) << 16) -# define DESCRIPTION15(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42 -# define DESCRIPTION16(x) (((x) & 0xff) << 0) -# define DESCRIPTION17(x) (((x) & 0xff) << 8) - -#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54 -# define AUDIO_ENABLED (1 << 31) - -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56 -#define PORT_CONNECTIVITY_MASK (3 << 30) -#define PORT_CONNECTIVITY_SHIFT 30 - -#define PRIORITY_A_CNT 0x1AC6 -#define PRIORITY_MARK_MASK 0x7fff -#define PRIORITY_OFF (1 << 16) -#define PRIORITY_ALWAYS_ON (1 << 20) -#define PRIORITY_B_CNT 0x1AC7 - -#define DPG_PIPE_LATENCY_CONTROL 0x1B33 -# define LATENCY_LOW_WATERMARK(x) ((x) << 0) -# define LATENCY_HIGH_WATERMARK(x) ((x) << 16) - -#define DISP_INTERRUPT_STATUS 0x183D -# define LB_D1_VLINE_INTERRUPT (1 << 2) -# define LB_D1_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD1_INTERRUPT (1 << 17) -# define DC_HPD1_RX_INTERRUPT (1 << 18) -# define DACA_AUTODETECT_INTERRUPT (1 << 22) -# define DACB_AUTODETECT_INTERRUPT (1 << 23) -# define DC_I2C_SW_DONE_INTERRUPT (1 << 24) -# define DC_I2C_HW_DONE_INTERRUPT (1 << 25) -#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E -# define LB_D2_VLINE_INTERRUPT (1 << 2) -# define LB_D2_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD2_INTERRUPT (1 << 17) -# define DC_HPD2_RX_INTERRUPT (1 << 18) -# define DISP_TIMER_INTERRUPT (1 << 24) -#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F -# define LB_D3_VLINE_INTERRUPT (1 << 2) -# define LB_D3_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD3_INTERRUPT (1 << 17) -# define DC_HPD3_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840 -# define LB_D4_VLINE_INTERRUPT (1 << 2) -# define LB_D4_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD4_INTERRUPT (1 << 17) -# define DC_HPD4_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853 -# define LB_D5_VLINE_INTERRUPT (1 << 2) -# define LB_D5_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD5_INTERRUPT (1 << 17) -# define DC_HPD5_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854 -# define LB_D6_VLINE_INTERRUPT (1 << 2) -# define LB_D6_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD6_INTERRUPT (1 << 17) -# define DC_HPD6_RX_INTERRUPT (1 << 18) - -/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */ -#define GRPH_INT_STATUS 0x1A16 -# define GRPH_PFLIP_INT_OCCURRED (1 << 0) -# define GRPH_PFLIP_INT_CLEAR (1 << 8) -/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */ -#define GRPH_INT_CONTROL 0x1A17 -# define GRPH_PFLIP_INT_MASK (1 << 0) -# define GRPH_PFLIP_INT_TYPE (1 << 8) - -#define DAC_AUTODETECT_INT_CONTROL 0x19F2 - -#define DC_HPD1_INT_STATUS 0x1807 -#define DC_HPD2_INT_STATUS 0x180A -#define DC_HPD3_INT_STATUS 0x180D -#define DC_HPD4_INT_STATUS 0x1810 -#define DC_HPD5_INT_STATUS 0x1813 -#define DC_HPD6_INT_STATUS 0x1816 -# define DC_HPDx_INT_STATUS (1 << 0) -# define DC_HPDx_SENSE (1 << 1) -# define DC_HPDx_RX_INT_STATUS (1 << 8) - -#define DC_HPD1_INT_CONTROL 0x1808 -#define DC_HPD2_INT_CONTROL 0x180B -#define DC_HPD3_INT_CONTROL 0x180E -#define DC_HPD4_INT_CONTROL 0x1811 -#define DC_HPD5_INT_CONTROL 0x1814 -#define DC_HPD6_INT_CONTROL 0x1817 -# define DC_HPDx_INT_ACK (1 << 0) -# define DC_HPDx_INT_POLARITY (1 << 8) -# define DC_HPDx_INT_EN (1 << 16) -# define DC_HPDx_RX_INT_ACK (1 << 20) -# define DC_HPDx_RX_INT_EN (1 << 24) - -#define DC_HPD1_CONTROL 0x1809 -#define DC_HPD2_CONTROL 0x180C -#define DC_HPD3_CONTROL 0x180F -#define DC_HPD4_CONTROL 0x1812 -#define DC_HPD5_CONTROL 0x1815 -#define DC_HPD6_CONTROL 0x1818 -# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0) -# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) -# define DC_HPDx_EN (1 << 28) - -#define DPG_PIPE_STUTTER_CONTROL 0x1B35 -# define STUTTER_ENABLE (1 << 0) - -/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */ -#define CRTC_STATUS_FRAME_COUNT 0x1BA6 - -/* Audio clocks */ -#define DCCG_AUDIO_DTO0_PHASE 0x05b0 -#define DCCG_AUDIO_DTO0_MODULE 0x05b4 -#define DCCG_AUDIO_DTO1_PHASE 0x05c0 -#define DCCG_AUDIO_DTO1_MODULE 0x05c4 - -#define GRBM_CNTL 0x2000 -#define GRBM_READ_TIMEOUT(x) ((x) << 0) - -#define GRBM_STATUS2 0x2002 -#define RLC_RQ_PENDING (1 << 0) -#define RLC_BUSY (1 << 8) -#define TC_BUSY (1 << 9) - -#define GRBM_STATUS 0x2004 -#define CMDFIFO_AVAIL_MASK 0x0000000F -#define RING2_RQ_PENDING (1 << 4) -#define SRBM_RQ_PENDING (1 << 5) -#define RING1_RQ_PENDING (1 << 6) -#define CF_RQ_PENDING (1 << 7) -#define PF_RQ_PENDING (1 << 8) -#define GDS_DMA_RQ_PENDING (1 << 9) -#define GRBM_EE_BUSY (1 << 10) -#define DB_CLEAN (1 << 12) -#define CB_CLEAN (1 << 13) -#define TA_BUSY (1 << 14) -#define GDS_BUSY (1 << 15) -#define VGT_BUSY (1 << 17) -#define IA_BUSY_NO_DMA (1 << 18) -#define IA_BUSY (1 << 19) -#define SX_BUSY (1 << 20) -#define SPI_BUSY (1 << 22) -#define BCI_BUSY (1 << 23) -#define SC_BUSY (1 << 24) -#define PA_BUSY (1 << 25) -#define DB_BUSY (1 << 26) -#define CP_COHERENCY_BUSY (1 << 28) -#define CP_BUSY (1 << 29) -#define CB_BUSY (1 << 30) -#define GUI_ACTIVE (1 << 31) -#define GRBM_STATUS_SE0 0x2005 -#define GRBM_STATUS_SE1 0x2006 -#define SE_DB_CLEAN (1 << 1) -#define SE_CB_CLEAN (1 << 2) -#define SE_BCI_BUSY (1 << 22) -#define SE_VGT_BUSY (1 << 23) -#define SE_PA_BUSY (1 << 24) -#define SE_TA_BUSY (1 << 25) -#define SE_SX_BUSY (1 << 26) -#define SE_SPI_BUSY (1 << 27) -#define SE_SC_BUSY (1 << 29) -#define SE_DB_BUSY (1 << 30) -#define SE_CB_BUSY (1 << 31) - -#define GRBM_INT_CNTL 0x2018 -# define RDERR_INT_ENABLE (1 << 0) -# define GUI_IDLE_INT_ENABLE (1 << 19) - -#define CP_STRMOUT_CNTL 0x213F -#define SCRATCH_REG0 0x2140 -#define SCRATCH_REG1 0x2141 -#define SCRATCH_REG2 0x2142 -#define SCRATCH_REG3 0x2143 -#define SCRATCH_REG4 0x2144 -#define SCRATCH_REG5 0x2145 -#define SCRATCH_REG6 0x2146 -#define SCRATCH_REG7 0x2147 - -#define SCRATCH_UMSK 0x2150 -#define SCRATCH_ADDR 0x2151 - -#define CP_SEM_WAIT_TIMER 0x216F - -#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172 - -#define CP_ME_CNTL 0x21B6 -#define CP_CE_HALT (1 << 24) -#define CP_PFP_HALT (1 << 26) -#define CP_ME_HALT (1 << 28) - -#define CP_COHER_CNTL2 0x217A - -#define CP_RB2_RPTR 0x21BE -#define CP_RB1_RPTR 0x21BF -#define CP_RB0_RPTR 0x21C0 -#define CP_RB_WPTR_DELAY 0x21C1 - -#define CP_QUEUE_THRESHOLDS 0x21D8 -#define ROQ_IB1_START(x) ((x) << 0) -#define ROQ_IB2_START(x) ((x) << 8) -#define CP_MEQ_THRESHOLDS 0x21D9 -#define MEQ1_START(x) ((x) << 0) -#define MEQ2_START(x) ((x) << 8) - -#define CP_PERFMON_CNTL 0x21FF - #define VGT_VTX_VECT_EJECT_REG 0x222C - #define VGT_ESGS_RING_SIZE 0x2232 #define VGT_GSVS_RING_SIZE 0x2233 - #define VGT_GS_VERTEX_REUSE 0x2235 - #define VGT_PRIMITIVE_TYPE 0x2256 #define VGT_INDEX_TYPE 0x2257 - #define VGT_NUM_INDICES 0x225C #define VGT_NUM_INSTANCES 0x225D - #define VGT_TF_RING_SIZE 0x2262 - #define VGT_HS_OFFCHIP_PARAM 0x226C - #define VGT_TF_MEMORY_BASE 0x226E -#define PA_CL_ENHANCE 0x2285 -#define CLIP_VTX_REORDER_ENA (1 << 0) -#define NUM_CLIP_SEQ(x) ((x) << 1) - -#define PA_SU_LINE_STIPPLE_VALUE 0x2298 - -#define PA_SC_LINE_STIPPLE_STATE 0x22C4 - -#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9 -#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) -#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16) - -#define PA_SC_FIFO_SIZE 0x22F3 -#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0) -#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6) -#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15) -#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23) - #define PA_SC_ENHANCE 0x22FC -#define SQ_CONFIG 0x2300 - -#define SQC_CACHES 0x2302 - -#define SQ_POWER_THROTTLE 0x2396 -#define MIN_POWER(x) ((x) << 0) -#define MIN_POWER_MASK (0x3fff << 0) -#define MIN_POWER_SHIFT 0 -#define MAX_POWER(x) ((x) << 16) -#define MAX_POWER_MASK (0x3fff << 16) -#define MAX_POWER_SHIFT 0 -#define SQ_POWER_THROTTLE2 0x2397 -#define MAX_POWER_DELTA(x) ((x) << 0) -#define MAX_POWER_DELTA_MASK (0x3fff << 0) -#define MAX_POWER_DELTA_SHIFT 0 -#define STI_SIZE(x) ((x) << 16) -#define STI_SIZE_MASK (0x3ff << 16) -#define STI_SIZE_SHIFT 16 -#define LTI_RATIO(x) ((x) << 27) -#define LTI_RATIO_MASK (0xf << 27) -#define LTI_RATIO_SHIFT 27 - -#define SX_DEBUG_1 0x2418 - -#define SPI_STATIC_THREAD_MGMT_1 0x2438 -#define SPI_STATIC_THREAD_MGMT_2 0x2439 -#define SPI_STATIC_THREAD_MGMT_3 0x243A -#define SPI_PS_MAX_WAVE_ID 0x243B - -#define SPI_CONFIG_CNTL 0x2440 - -#define SPI_CONFIG_CNTL_1 0x244F -#define VTX_DONE_DELAY(x) ((x) << 0) -#define INTERP_ONE_PRIM_PER_ROW (1 << 4) - -#define CGTS_TCC_DISABLE 0x2452 -#define CGTS_USER_TCC_DISABLE 0x2453 -#define TCC_DISABLE_MASK 0xFFFF0000 -#define TCC_DISABLE_SHIFT 16 -#define CGTS_SM_CTRL_REG 0x2454 -#define OVERRIDE (1 << 21) -#define LS_OVERRIDE (1 << 22) - -#define SPI_LB_CU_MASK 0x24D5 - #define TA_CNTL_AUX 0x2542 -#define CC_RB_BACKEND_DISABLE 0x263D -#define BACKEND_DISABLE(x) ((x) << 16) -#define GB_ADDR_CONFIG 0x263E -#define NUM_PIPES(x) ((x) << 0) -#define NUM_PIPES_MASK 0x00000007 -#define NUM_PIPES_SHIFT 0 -#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4) -#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070 -#define PIPE_INTERLEAVE_SIZE_SHIFT 4 -#define NUM_SHADER_ENGINES(x) ((x) << 12) -#define NUM_SHADER_ENGINES_MASK 0x00003000 -#define NUM_SHADER_ENGINES_SHIFT 12 -#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16) -#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000 -#define SHADER_ENGINE_TILE_SIZE_SHIFT 16 -#define NUM_GPUS(x) ((x) << 20) -#define NUM_GPUS_MASK 0x00700000 -#define NUM_GPUS_SHIFT 20 -#define MULTI_GPU_TILE_SIZE(x) ((x) << 24) -#define MULTI_GPU_TILE_SIZE_MASK 0x03000000 -#define MULTI_GPU_TILE_SIZE_SHIFT 24 -#define ROW_SIZE(x) ((x) << 28) -#define ROW_SIZE_MASK 0x30000000 -#define ROW_SIZE_SHIFT 28 - -#define CB_PERFCOUNTER0_SELECT0 0x2688 -#define CB_PERFCOUNTER0_SELECT1 0x2689 -#define CB_PERFCOUNTER1_SELECT0 0x268A -#define CB_PERFCOUNTER1_SELECT1 0x268B -#define CB_PERFCOUNTER2_SELECT0 0x268C -#define CB_PERFCOUNTER2_SELECT1 0x268D -#define CB_PERFCOUNTER3_SELECT0 0x268E -#define CB_PERFCOUNTER3_SELECT1 0x268F - -#define CB_CGTT_SCLK_CTRL 0x2698 - -#define TCP_CHAN_STEER_LO 0x2B03 -#define TCP_CHAN_STEER_HI 0x2B94 - -#define CP_RB0_BASE 0x3040 -#define CP_RB0_CNTL 0x3041 -#define RB_BUFSZ(x) ((x) << 0) -#define RB_BLKSZ(x) ((x) << 8) -#define BUF_SWAP_32BIT (2 << 16) -#define RB_NO_UPDATE (1 << 27) -#define RB_RPTR_WR_ENA (1 << 31) - -#define CP_RB0_RPTR_ADDR 0x3043 -#define CP_RB0_RPTR_ADDR_HI 0x3044 -#define CP_RB0_WPTR 0x3045 - -#define CP_PFP_UCODE_ADDR 0x3054 -#define CP_PFP_UCODE_DATA 0x3055 -#define CP_ME_RAM_RADDR 0x3056 -#define CP_ME_RAM_WADDR 0x3057 -#define CP_ME_RAM_DATA 0x3058 - -#define CP_CE_UCODE_ADDR 0x305A -#define CP_CE_UCODE_DATA 0x305B - -#define CP_RB1_BASE 0x3060 -#define CP_RB1_CNTL 0x3061 -#define CP_RB1_RPTR_ADDR 0x3062 -#define CP_RB1_RPTR_ADDR_HI 0x3063 -#define CP_RB1_WPTR 0x3064 -#define CP_RB2_BASE 0x3065 -#define CP_RB2_CNTL 0x3066 -#define CP_RB2_RPTR_ADDR 0x3067 -#define CP_RB2_RPTR_ADDR_HI 0x3068 -#define CP_RB2_WPTR 0x3069 -#define CP_INT_CNTL_RING0 0x306A -#define CP_INT_CNTL_RING1 0x306B -#define CP_INT_CNTL_RING2 0x306C -# define CNTX_BUSY_INT_ENABLE (1 << 19) -# define CNTX_EMPTY_INT_ENABLE (1 << 20) -# define WAIT_MEM_SEM_INT_ENABLE (1 << 21) -# define TIME_STAMP_INT_ENABLE (1 << 26) -# define CP_RINGID2_INT_ENABLE (1 << 29) -# define CP_RINGID1_INT_ENABLE (1 << 30) -# define CP_RINGID0_INT_ENABLE (1 << 31) -#define CP_INT_STATUS_RING0 0x306D -#define CP_INT_STATUS_RING1 0x306E -#define CP_INT_STATUS_RING2 0x306F -# define WAIT_MEM_SEM_INT_STAT (1 << 21) -# define TIME_STAMP_INT_STAT (1 << 26) -# define CP_RINGID2_INT_STAT (1 << 29) -# define CP_RINGID1_INT_STAT (1 << 30) -# define CP_RINGID0_INT_STAT (1 << 31) - // #define PA_SC_RASTER_CONFIG 0xA0D4 # define RB_XSEL2(x) ((x) << 4) # define RB_XSEL2_MASK (0x3 << 4) @@ -1160,171 +316,14 @@ # define SE_YSEL(x) ((x) << 28) # define SE_YSEL_MASK (0x3 << 28) -/* PIF PHY0 registers idx/data 0x8/0xc */ -#define PB0_PIF_CNTL 0x10 -# define LS2_EXIT_TIME(x) ((x) << 17) -# define LS2_EXIT_TIME_MASK (0x7 << 17) -# define LS2_EXIT_TIME_SHIFT 17 -#define PB0_PIF_PAIRING 0x11 -# define MULTI_PIF (1 << 25) -#define PB0_PIF_PWRDOWN_0 0x12 -# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10 -# define PLL_RAMP_UP_TIME_0(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_0_SHIFT 24 -#define PB0_PIF_PWRDOWN_1 0x13 -# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10 -# define PLL_RAMP_UP_TIME_1(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_1_SHIFT 24 - -#define PB0_PIF_PWRDOWN_2 0x17 -# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10 -# define PLL_RAMP_UP_TIME_2(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_2_SHIFT 24 -#define PB0_PIF_PWRDOWN_3 0x18 -# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10 -# define PLL_RAMP_UP_TIME_3(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_3_SHIFT 24 -/* PIF PHY1 registers idx/data 0x10/0x14 */ -#define PB1_PIF_CNTL 0x10 -#define PB1_PIF_PAIRING 0x11 -#define PB1_PIF_PWRDOWN_0 0x12 -#define PB1_PIF_PWRDOWN_1 0x13 - -#define PB1_PIF_PWRDOWN_2 0x17 -#define PB1_PIF_PWRDOWN_3 0x18 -/* PCIE registers idx/data 0x30/0x34 */ -#define PCIE_CNTL2 0x1c /* PCIE */ -# define SLV_MEM_LS_EN (1 << 16) -# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17) -# define MST_MEM_LS_EN (1 << 18) -# define REPLAY_MEM_LS_EN (1 << 19) -#define PCIE_LC_STATUS1 0x28 /* PCIE */ -# define LC_REVERSE_RCVR (1 << 0) -# define LC_REVERSE_XMIT (1 << 1) -# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2) -# define LC_OPERATING_LINK_WIDTH_SHIFT 2 -# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5) -# define LC_DETECTED_LINK_WIDTH_SHIFT 5 - -#define PCIE_P_CNTL 0x40 /* PCIE */ -# define P_IGNORE_EDB_ERR (1 << 6) - /* PCIE PORT registers idx/data 0x38/0x3c */ -#define PCIE_LC_CNTL 0xa0 -# define LC_L0S_INACTIVITY(x) ((x) << 8) -# define LC_L0S_INACTIVITY_MASK (0xf << 8) -# define LC_L0S_INACTIVITY_SHIFT 8 -# define LC_L1_INACTIVITY(x) ((x) << 12) -# define LC_L1_INACTIVITY_MASK (0xf << 12) -# define LC_L1_INACTIVITY_SHIFT 12 -# define LC_PMI_TO_L1_DIS (1 << 16) -# define LC_ASPM_TO_L1_DIS (1 << 24) -#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ -# define LC_LINK_WIDTH_SHIFT 0 -# define LC_LINK_WIDTH_MASK 0x7 +// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ # define LC_LINK_WIDTH_X0 0 # define LC_LINK_WIDTH_X1 1 # define LC_LINK_WIDTH_X2 2 # define LC_LINK_WIDTH_X4 3 # define LC_LINK_WIDTH_X8 4 # define LC_LINK_WIDTH_X16 6 -# define LC_LINK_WIDTH_RD_SHIFT 4 -# define LC_LINK_WIDTH_RD_MASK 0x70 -# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7) -# define LC_RECONFIG_NOW (1 << 8) -# define LC_RENEGOTIATION_SUPPORT (1 << 9) -# define LC_RENEGOTIATE_EN (1 << 10) -# define LC_SHORT_RECONFIG_EN (1 << 11) -# define LC_UPCONFIGURE_SUPPORT (1 << 12) -# define LC_UPCONFIGURE_DIS (1 << 13) -# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21) -# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21) -# define LC_DYN_LANES_PWR_STATE_SHIFT 21 -#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */ -# define LC_XMIT_N_FTS(x) ((x) << 0) -# define LC_XMIT_N_FTS_MASK (0xff << 0) -# define LC_XMIT_N_FTS_SHIFT 0 -# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8) -# define LC_N_FTS_MASK (0xff << 24) -#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */ -# define LC_GEN2_EN_STRAP (1 << 0) -# define LC_GEN3_EN_STRAP (1 << 1) -# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2) -# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3) -# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3 -# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5) -# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6) -# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7) -# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8) -# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9) -# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10) -# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10 -# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */ -# define LC_CURRENT_DATA_RATE_SHIFT 13 -# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16) -# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18) -# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19) -# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20) -# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21) - -#define PCIE_LC_CNTL2 0xb1 -# define LC_ALLOW_PDWN_IN_L1 (1 << 17) -# define LC_ALLOW_PDWN_IN_L23 (1 << 18) - -#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */ -# define LC_GO_TO_RECOVERY (1 << 30) -#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */ -# define LC_REDO_EQ (1 << 5) -# define LC_SET_QUIESCE (1 << 13) - -/* - * UVD - */ -#define UVD_UDEC_ADDR_CONFIG 0x3bd3 -#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4 -#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5 -#define UVD_RBC_RB_RPTR 0x3da4 -#define UVD_RBC_RB_WPTR 0x3da5 -#define UVD_STATUS 0x3daf - -#define UVD_CGC_CTRL 0x3dc2 -# define DCM (1 << 0) -# define CG_DT(x) ((x) << 2) -# define CG_DT_MASK (0xf << 2) -# define CLK_OD(x) ((x) << 6) -# define CLK_OD_MASK (0x1f << 6) - - /* UVD CTX indirect */ -#define UVD_CGC_MEM_CTRL 0xC0 -#define UVD_CGC_CTRL2 0xC1 -# define DYN_OR_EN (1 << 0) -# define DYN_RR_EN (1 << 1) -# define G_DIV_ID(x) ((x) << 2) -# define G_DIV_ID_MASK (0x7 << 2) /* * PM4 @@ -1558,12 +557,7 @@ /* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ #define DMA1_REGISTER_OFFSET 0x200 /* not a register */ - -#define DMA_IB_RPTR 0x340a -#define DMA_TILING_CONFIG 0x342e - -#define DMA_PG 0x3435 -# define PG_CNTL_ENABLE (1 << 0) +#define SDMA_MAX_INSTANCE 2 #define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ (((b) & 0x1) << 26) | \ @@ -1669,58 +663,6 @@ #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 -#define AMDGPU_MM_INDEX 0x0000 -#define AMDGPU_MM_DATA 0x0001 - -#define VERDE_NUM_CRTC 6 -#define BLACKOUT_MODE_MASK 0x00000007 -#define VGA_RENDER_CONTROL 0xC0 -#define R_000300_VGA_RENDER_CONTROL 0xC0 -#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF -#define EVERGREEN_CRTC_STATUS 0x1BA3 -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4 -/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */ -#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d -#define EVERGREEN_CRTC_CONTROL 0x1b9c -#define EVERGREEN_CRTC_MASTER_EN (1 << 0) -#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) -#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d -#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8) -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8 -#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5 -#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd -#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05 -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) - -#define EVERGREEN_DATA_FORMAT 0x1ac0 -# define EVERGREEN_INTERLEAVE_EN (1 << 0) - -#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20) -#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20) -#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20) -#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20) - -#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45 -#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845 - -#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847 -#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47 - -#define R600_D1GRPH_SWAP_CONTROL 0x1843 -#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0) #define AVIVO_D1VGA_CONTROL 0x00cc # define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0) @@ -1739,53 +681,6 @@ # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 # define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28) -#define FMT_BIT_DEPTH_CONTROL 0x1bf2 -#define FMT_TRUNCATE_EN (1 << 0) -#define FMT_TRUNCATE_DEPTH (1 << 4) -#define FMT_SPATIAL_DITHER_EN (1 << 8) -#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9) -#define FMT_SPATIAL_DITHER_DEPTH (1 << 12) -#define FMT_FRAME_RANDOM_ENABLE (1 << 13) -#define FMT_RGB_RANDOM_ENABLE (1 << 14) -#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15) -#define FMT_TEMPORAL_DITHER_EN (1 << 16) -#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20) -#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21) -#define FMT_TEMPORAL_LEVEL (1 << 24) -#define FMT_TEMPORAL_DITHER_RESET (1 << 25) -#define FMT_25FRC_SEL(x) ((x) << 26) -#define FMT_50FRC_SEL(x) ((x) << 28) -#define FMT_75FRC_SEL(x) ((x) << 30) - -#define EVERGREEN_D3VGA_CONTROL 0xf8 -#define EVERGREEN_D4VGA_CONTROL 0xf9 -#define EVERGREEN_D5VGA_CONTROL 0xfa -#define EVERGREEN_D6VGA_CONTROL 0xfb - -#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00 - -#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02 -#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8) - -#define EVERGREEN_GRPH_PITCH 0x1a06 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09 -#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a -#define EVERGREEN_GRPH_X_START 0x1a0b -#define EVERGREEN_GRPH_Y_START 0x1a0c -#define EVERGREEN_GRPH_X_END 0x1a0d -#define EVERGREEN_GRPH_Y_END 0x1a0e -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12 -#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0) - -#define EVERGREEN_VIEWPORT_START 0x1b5c -#define EVERGREEN_VIEWPORT_SIZE 0x1b5d -#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1 - #define GRPH_ARRAY_LINEAR_GENERAL 0 #define GRPH_ARRAY_LINEAR_ALIGNED 1 #define GRPH_ARRAY_1D_TILED_THIN1 2 @@ -1794,95 +689,6 @@ #define ES_AND_GS_AUTO 3 #define BUF_SWAP_32BIT (2 << 16) -/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */ -#define EVERGREEN_CUR_CONTROL 0x1a66 -# define EVERGREEN_CURSOR_EN (1 << 0) -# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8) -# define EVERGREEN_CURSOR_MONO 0 -# define EVERGREEN_CURSOR_24_1 1 -# define EVERGREEN_CURSOR_24_8_PRE_MULT 2 -# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3 -# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16) -# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20) -# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -# define EVERGREEN_CURSOR_URGENT_ALWAYS 0 -# define EVERGREEN_CURSOR_URGENT_1_8 1 -# define EVERGREEN_CURSOR_URGENT_1_4 2 -# define EVERGREEN_CURSOR_URGENT_3_8 3 -# define EVERGREEN_CURSOR_URGENT_1_2 4 -#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67 -# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000 -#define EVERGREEN_CUR_SIZE 0x1a68 -#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69 -#define EVERGREEN_CUR_POSITION 0x1a6a -#define EVERGREEN_CUR_HOT_SPOT 0x1a6b -#define EVERGREEN_CUR_COLOR1 0x1a6c -#define EVERGREEN_CUR_COLOR2 0x1a6d -#define EVERGREEN_CUR_UPDATE 0x1a6e -# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0) -# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1) -# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16) -# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - -#define BLACKOUT_MODE_MASK 0x00000007 -#define VGA_RENDER_CONTROL 0xC0 -#define R_000300_VGA_RENDER_CONTROL 0xC0 -#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF -#define EVERGREEN_CRTC_STATUS 0x1BA3 -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4 -/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */ -#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d -#define EVERGREEN_CRTC_CONTROL 0x1b9c -# define EVERGREEN_CRTC_MASTER_EN (1 << 0) -# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) -#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d -# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8) -# define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8 -#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5 -#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd -#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05 -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) - -#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10 -#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4 -#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80 -#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7 -#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400 -#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa -#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000 -#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd -#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000 -#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10 -#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000 -#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13 - -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18 - -#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7 -#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0 - -#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1 -#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0 -#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2 -#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1 - #define GRPH_DEPTH_8BPP 0 #define GRPH_DEPTH_16BPP 1 #define GRPH_DEPTH_32BPP 2 @@ -1981,11 +787,6 @@ # define INPUT_GAMMA_SRGB_24 2 # define INPUT_GAMMA_XVYCC_222 3 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb - #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 #define MC_SEQ_MISC0__MT__DDR2 0x20000000 @@ -2000,15 +801,9 @@ #define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) #define PACKET3_SEM_SEL_WAIT (0x7 << 29) -#define CONFIG_CNTL 0x1509 -#define CC_DRM_ID_STRAPS 0X1559 #define AMDGPU_PCIE_INDEX 0xc #define AMDGPU_PCIE_DATA 0xd -#define DMA_MODE 0x342f -#define DMA_BUSY_MASK 0x20 -#define SDMA_MAX_INSTANCE 2 - #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) #define PCIE_PORT_INDEX 0xe @@ -2018,8 +813,6 @@ #define EVERGREEN_PIF_PHY1_INDEX 0x10 #define EVERGREEN_PIF_PHY1_DATA 0x14 -#define MC_VM_FB_OFFSET 0x81a - /* Discrete VCE clocks */ #define CG_VCEPLL_FUNC_CNTL 0xc0030600 #define VCEPLL_RESET_MASK 0x00000001 -- GitLab From ca690c7e211d2c044056ac2b878a22f8c6601820 Mon Sep 17 00:00:00 2001 From: James Flowers Date: Thu, 27 Mar 2025 20:29:02 -0700 Subject: [PATCH 099/899] drm/amd/display: removed unused function Removed unused function mpc401_get_3dlut_fast_load_status. Signed-off-by: James Flowers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 17 ----------------- .../drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c | 11 ----------- .../drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h | 14 -------------- 3 files changed, 42 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 3a89cc0cffc11..eaef3899da7bd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -967,23 +967,6 @@ struct mpc_funcs { */ void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx); - /** - * @get_3dlut_fast_load_status: - * - * Get 3D LUT fast load status and reference them with done, soft_underflow and hard_underflow pointers. - * - * Parameters: - * - [in/out] mpc - MPC context. - * - [in] mpcc_id - * - [in/out] done - * - [in/out] soft_underflow - * - [in/out] hard_underflow - * - * Return: - * - * void - */ - void (*get_3dlut_fast_load_status)(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow); /** * @populate_lut: diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c index ad67197557ca1..98cf0cbd59ba0 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c @@ -47,16 +47,6 @@ void mpc401_update_3dlut_fast_load_select(struct mpc *mpc, int mpcc_id, int hubp REG_SET(MPCC_MCM_3DLUT_FAST_LOAD_SELECT[mpcc_id], 0, MPCC_MCM_3DLUT_FL_SEL, hubp_idx); } -void mpc401_get_3dlut_fast_load_status(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow) -{ - struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); - - REG_GET_3(MPCC_MCM_3DLUT_FAST_LOAD_STATUS[mpcc_id], - MPCC_MCM_3DLUT_FL_DONE, done, - MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW, soft_underflow, - MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW, hard_underflow); -} - void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id) { struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); @@ -618,7 +608,6 @@ static const struct mpc_funcs dcn401_mpc_funcs = { .set_bg_color = mpc1_set_bg_color, .set_movable_cm_location = mpc401_set_movable_cm_location, .update_3dlut_fast_load_select = mpc401_update_3dlut_fast_load_select, - .get_3dlut_fast_load_status = mpc401_get_3dlut_fast_load_status, .populate_lut = mpc401_populate_lut, .program_lut_read_write_control = mpc401_program_lut_read_write_control, .program_lut_mode = mpc401_program_lut_mode, diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h index ce6fbcf14d7a3..8e35ebc603a9d 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h @@ -241,23 +241,9 @@ void mpc401_update_3dlut_fast_load_select( int mpcc_id, int hubp_idx); -void mpc401_get_3dlut_fast_load_status( - struct mpc *mpc, - int mpcc_id, - uint32_t *done, - uint32_t *soft_underflow, - uint32_t *hard_underflow); - void mpc401_update_3dlut_fast_load_select( struct mpc *mpc, int mpcc_id, int hubp_idx); -void mpc401_get_3dlut_fast_load_status( - struct mpc *mpc, - int mpcc_id, - uint32_t *done, - uint32_t *soft_underflow, - uint32_t *hard_underflow); - #endif -- GitLab From daafa303d19f5522e4c24fbf5c1c981a16df2c2f Mon Sep 17 00:00:00 2001 From: Apurv Mishra Date: Mon, 17 Mar 2025 14:00:38 -0400 Subject: [PATCH 100/899] drm/amdkfd: Drop workaround for GC v9.4.3 revID 0 Remove workaround code for the early engineering samples GC v9.4.3 SOCs with revID 0 Reviewed-by: Amber Lin Signed-off-by: Apurv Mishra Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++------------ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 ----- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 +-- 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f4a01704effcf..1878bbacf0a2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2692,6 +2692,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + /* Check for IP version 9.4.3 with A0 hardware */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + !amdgpu_device_get_rev_id(adev)) { + dev_err(adev->dev, "Unsupported A0 hardware\n"); + return -ENODEV; /* device unsupported - no device error */ + } + if (amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && @@ -2704,7 +2711,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 783e0c3b86b4c..8d3560314e5b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1213,10 +1213,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id) - mtype = is_local ? MTYPE_CC : MTYPE_UC; - else - mtype = MTYPE_UC; + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1336,7 +1333,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, mtype_local = MTYPE_CC; *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local); - } else if (adev->rev_id) { + } else { /* MTYPE_UC case */ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); } @@ -2411,13 +2408,6 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->gmc.flush_tlb_needs_extra_type_2 = amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) && adev->gmc.xgmi.num_physical_nodes; - /* - * TODO: This workaround is badly documented and had a buggy - * implementation. We should probably verify what we do here. - */ - adev->gmc.flush_tlb_needs_extra_type_0 = - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && - adev->rev_id == 0; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index b9c82be6ce134..bf0854bd55551 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -352,11 +352,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &aldebaran_kfd2kgd; break; case IP_VERSION(9, 4, 3): - gfx_target_version = adev->rev_id >= 1 ? 90402 - : adev->flags & AMD_IS_APU ? 90400 - : 90401; - f2g = &gc_9_4_3_kfd2kgd; - break; case IP_VERSION(9, 4, 4): gfx_target_version = 90402; f2g = &gc_9_4_3_kfd2kgd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 4afff7094cafc..a65c67cf56ff3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -402,7 +402,7 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) { u32 vgpr_size = 0x40000; - if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ gfxv == 90500) @@ -462,7 +462,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) if (gfxv == 80002) /* GFX_VERSION_TONGA */ props->eop_buffer_size = 0x8000; - else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */ props->eop_buffer_size = 4096; else if (gfxv >= 80000) props->eop_buffer_size = 4096; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 100717a98ec11..72be6e152e881 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1245,8 +1245,7 @@ svm_range_get_pte_flags(struct kfd_node *node, case IP_VERSION(9, 4, 4): case IP_VERSION(9, 5, 0): if (ext_coherent) - mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC; + mtype_local = AMDGPU_VM_MTYPE_CC; else mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; -- GitLab From 5df0d6addb7e9b6f71f7162d1253762a5be9138e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 26 Mar 2025 13:28:38 +0530 Subject: [PATCH 101/899] drm/amdgpu: Add basic validation for RAS header If RAS header read from EEPROM is corrupted, it could result in trying to allocate huge memory for reading the records. Add some validation to header fields. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 0ea7cfaf3587d..e979a6086178c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1392,17 +1392,33 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) __decode_table_header_from_buf(hdr, buf); - if (hdr->version >= RAS_TABLE_VER_V2_1) { + switch (hdr->version) { + case RAS_TABLE_VER_V2_1: + case RAS_TABLE_VER_V3: control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); control->ras_record_offset = RAS_RECORD_START_V2_1; control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; - } else { + break; + case RAS_TABLE_VER_V1: control->ras_num_recs = RAS_NUM_RECS(hdr); control->ras_record_offset = RAS_RECORD_START; control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + break; + default: + dev_err(adev->dev, + "RAS header invalid, unsupported version: %u", + hdr->version); + return -EINVAL; } - control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + if (control->ras_num_recs > control->ras_max_record_count) { + dev_err(adev->dev, + "RAS header invalid, records in header: %u max allowed :%u", + control->ras_num_recs, control->ras_max_record_count); + return -EINVAL; + } + + control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); control->ras_num_mca_recs = 0; control->ras_num_pa_recs = 0; return 0; -- GitLab From d53a64e9ee58c3567955db6147d34f00c90065ed Mon Sep 17 00:00:00 2001 From: Andrey Vatoropin Date: Wed, 2 Apr 2025 14:21:39 +0000 Subject: [PATCH 102/899] drm/amd/display: Remove the redundant NULL check Static analysis shows that pointer "timing" cannot be NULL because it points to the object "struct dc_crtc_timing". Remove the extra NULL check. It is meaningless and harms the readability of the code. Found by Linux Verification Center (linuxtesting.org) with SVACE. Reviewed-by: Alex Hung Signed-off-by: Andrey Vatoropin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index f01ced0150726..f040c22db59bb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -1891,8 +1891,6 @@ static int get_refresh_rate(struct dc_state *context) /* check if refresh rate at least 120hz */ timing = &context->streams[0]->timing; - if (timing == NULL) - return 0; h_v_total = timing->h_total * timing->v_total; if (h_v_total == 0) -- GitLab From 6dee64e765c4c80d128817f519292e249f53a769 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 27 Mar 2025 09:34:15 +0530 Subject: [PATCH 103/899] drm/amdgpu: Fix xgmi v6.4.1 link status reporting Use the right register offsets for getting link status. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 477424472bbee..95231de26cb11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -296,15 +296,27 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = { static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num) { - const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070; - const int xgmi_inst = 2; - u32 link_inst; + const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 }; + const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x11b00070, + 0x12100070 }; + u32 i, n; u64 addr; - link_inst = global_link_num % xgmi_inst; + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1); + addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n]; + break; + case IP_VERSION(6, 4, 1): + n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1); + addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n]; + break; + default: + return U32_MAX; + } - addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) + - adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst); + i = global_link_num / n; + addr += adev->asic_funcs->encode_ext_smn_addressing(i); return RREG32_PCIE_EXT(addr); } -- GitLab From 89dab189a20e8ab653b37059536f2d5d73666a9e Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 3 Apr 2025 12:57:09 +0530 Subject: [PATCH 104/899] drm/amdgpu: Fix the comment to avoid warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the below comment warning drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c:541: warning: Function parameter or struct member 'adev' not described in 'amdgpu_sdma_register_on_reset_callbacks' Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 529c9696c2f32..807da45958934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -539,6 +539,7 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin /** * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks + * @adev: Pointer to the AMDGPU device * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions * * This function allows KFD and AMDGPU to register their own callbacks for handling -- GitLab From 7bb430f087e1dd50ac870e787f850e19eb1c2be6 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Wed, 19 Mar 2025 12:12:43 +0100 Subject: [PATCH 105/899] drm/amdgpu: Fix typo in DC_DEBUG_MASK kernel-doc Add missing colon in kernel-doc for DC_DEBUG_MASK enum. Signed-off-by: Dominik Kaszewski Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 4c95b885d1d00..c8eccee9b023b 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -366,7 +366,7 @@ enum DC_DEBUG_MASK { DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000, /** - * @DC_HDCP_LC_ENABLE_SW_FALLBACK If set, upon HDCP Locality Check FW + * @DC_HDCP_LC_ENABLE_SW_FALLBACK: If set, upon HDCP Locality Check FW * path failure, retry using legacy SW path. */ DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000, -- GitLab From f9fbc338811c8d123f0c05cd699ffa3ae7f08d34 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Sun, 30 Mar 2025 13:54:06 -0500 Subject: [PATCH 106/899] drm/amdgpu: Fix CPER error handling on VFs CPER read will loop infinitely if an error is encountered and the more bit is set. Add error checks to break upon failure. v2: added function pointer checks Suggested-by: Tony Yi Signed-off-by: Victor Skvortsov Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0bb8cbe0dcc05..83f3334b39312 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1323,6 +1323,9 @@ static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo { struct amdgpu_virt *virt = &adev->virt; + if (!virt->ops || !virt->ops->req_ras_err_count) + return -EOPNOTSUPP; + /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host * will ignore incoming guest messages. Ratelimit the guest messages to * prevent guest self DOS. @@ -1378,14 +1381,16 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev, used_size = host_telemetry->header.used_size; if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) - return 0; + return -EINVAL; cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL); if (!cper_dump) return -ENOMEM; - if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) + if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) { + ret = -EINVAL; goto out; + } *more = cper_dump->more; @@ -1425,7 +1430,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) int ret = 0; uint32_t more = 0; - if (!amdgpu_sriov_ras_cper_en(adev)) + if (!virt->ops || !virt->ops->req_ras_cper_dump) return -EOPNOTSUPP; do { @@ -1434,7 +1439,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) adev, virt->fw_reserve.ras_telemetry, &more); else ret = 0; - } while (more); + } while (more && !ret); return ret; } @@ -1444,6 +1449,9 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update) struct amdgpu_virt *virt = &adev->virt; int ret = 0; + if (!amdgpu_sriov_ras_cper_en(adev)) + return -EOPNOTSUPP; + if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) && down_read_trylock(&adev->reset_domain->sem)) { mutex_lock(&virt->ras.ras_telemetry_mutex); -- GitLab From 03b979e1025fba1d47cae005022fcdbba140f043 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 24 Mar 2025 12:57:25 -0500 Subject: [PATCH 107/899] drm/amd/display: Optimize custom brightness curve [Why] When BIOS includes a lot of custom brightness data points, walking the entire list can be time consuming. This is most noticed when dragging a power slider. The "higher" values are "slower" to drag around. [How] Move custom brightness calculation loop into a static function. Before starting the loop check the "half way" data point to see how it compares to the input. If greater than the half way data point use that as the starting point instead. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f8847cf10dbd0..f1de629c76378 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4826,41 +4826,54 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } -static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps, - uint32_t brightness) +static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, + uint32_t *brightness) { - unsigned int min, max; u8 prev_signal = 0, prev_lum = 0; + int i = 0; - if (!get_brightness_range(caps, &min, &max)) - return brightness; - - for (int i = 0; i < caps->data_points; i++) { - u8 signal, lum; + if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE) + return; - if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE) - break; + if (!caps->data_points) + return; - signal = caps->luminance_data[i].input_signal; - lum = caps->luminance_data[i].luminance; + /* choose start to run less interpolation steps */ + if (caps->luminance_data[caps->data_points/2].input_signal > *brightness) + i = caps->data_points/2; + do { + u8 signal = caps->luminance_data[i].input_signal; + u8 lum = caps->luminance_data[i].luminance; /* * brightness == signal: luminance is percent numerator * brightness < signal: interpolate between previous and current luminance numerator * brightness > signal: find next data point */ - if (brightness < signal) - lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (brightness - prev_signal), - signal - prev_signal); - else if (brightness > signal) { + if (*brightness > signal) { prev_signal = signal; prev_lum = lum; + i++; continue; } - brightness = DIV_ROUND_CLOSEST(lum * brightness, 101); - break; - } + if (*brightness < signal) + lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * + (*brightness - prev_signal), + signal - prev_signal); + *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101); + return; + } while (i < caps->data_points); +} + +static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps, + uint32_t brightness) +{ + unsigned int min, max; + + if (!get_brightness_range(caps, &min, &max)) + return brightness; + + convert_custom_brightness(caps, &brightness); // Rescale 0..255 to min..max return min + DIV_ROUND_CLOSEST((max - min) * brightness, -- GitLab From d01a7306e1bec9c02268793f58144e3e42695bf0 Mon Sep 17 00:00:00 2001 From: Kevin Gao Date: Wed, 26 Mar 2025 14:14:05 -0400 Subject: [PATCH 108/899] drm/amd/display: Correct SSC enable detection for DCN351 [Why] Due to very small clock register delta between DCN35 and DCN351, clock spread is being checked on the wrong register for DCN351, causing the display driver to believe that DPREFCLK downspread to be disabled when in some stacks it is enabled. This causes the clock values for audio to be incorrect. [How] Both DCN351 and DCN35 use the same clk_mgr, so we modify the DCN35 function that checks for SSC enable to read CLK6 instead of CLK5 when using DCN351. This allows us to read for DPREFCLK downspread correctly so the clock can properly compensate when setting values. Reviewed-by: Charlene Liu Signed-off-by: Kevin Gao Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c | 1 + .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 8 +++++++- drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c index 6a6ae618650b6..4607eff07253c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c @@ -65,6 +65,7 @@ #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 #define mmCLK5_spll_field_8 0x1B04B +#define mmCLK6_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 142de8938d7c3..bb1ac12a2b095 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -90,6 +90,7 @@ #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 #define mmCLK5_spll_field_8 0x1B24B +#define mmCLK6_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 @@ -116,6 +117,7 @@ #define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L #define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L +#define CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L #define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 #undef FN @@ -596,7 +598,11 @@ static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) uint32_t ssc_enable; - ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; + if (clk_mgr_base->ctx->dce_version == DCN_VERSION_3_51) { + ssc_enable = REG_READ(CLK6_spll_field_8) & CLK6_spll_field_8__spll_ssc_en_MASK; + } else { + ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; + } return ssc_enable != 0; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 221645c023b50..bac8febad69a5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -199,6 +199,7 @@ enum dentist_divider_range { CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \ CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \ CLK_SR_DCN35(CLK5_spll_field_8), \ + CLK_SR_DCN35(CLK6_spll_field_8), \ SR(DENTIST_DISPCLK_CNTL), \ #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \ @@ -307,7 +308,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK4_ALLOW_DS; uint32_t CLK1_CLK5_ALLOW_DS; uint32_t CLK5_spll_field_8; - + uint32_t CLK6_spll_field_8; }; struct clk_mgr_shift { -- GitLab From ce801e5d6c1bac228bf10f75e8bede4285c58282 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Thu, 27 Mar 2025 20:39:51 +0100 Subject: [PATCH 109/899] drm/amd/display: HDCP Locality check using DMUB Fused IO [Why] HDCP locality check has strict timing requirements, currently broken due to reliance on msleep which does not guarantee accuracy. The PR moves the write-poll-read sequence into DMUB using new generic Fused IO interface, where the timing accuracy is greatly improved. New flow is enabled using DCN resource capability bit (none for now), or using a debug flag. [How] * Extended mod_hdcp_config with new function for requesting DMUB to execute a sequence of fused I2C/AUX commands and synchronously wait until an outbox reply arrives or a timeout expires. * If the timeout expires, send an abort to DMUB. * Update HDCP to use the DMUB for locality check if supported. * Add DC_HDCP_LC_FORCE_FW_ENABLE and DC_HDCP_LC_ENABLE_SW_FALLBACK. * Make the first enable new flow regardless of resource capabilities. * Make the second enable fallback to old SW flow. * Clean up makefile source file listings for easier updates. Reviewed-by: Alvin Lee Signed-off-by: Dominik Kaszewski Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 148 ++++++++++++++++-- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 16 +- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 56 ++++++- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 13 ++ drivers/gpu/drm/amd/display/dc/Makefile | 41 +++-- drivers/gpu/drm/amd/display/dc/dc.h | 4 + drivers/gpu/drm/amd/display/dc/dc_fused_io.c | 144 +++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_fused_io.h | 31 ++++ drivers/gpu/drm/amd/display/dc/dm_helpers.h | 8 + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 2 + .../drm/amd/display/dmub/src/dmub_srv_stat.c | 4 + .../gpu/drm/amd/display/modules/hdcp/hdcp.h | 1 + .../display/modules/hdcp/hdcp2_execution.c | 53 +++++-- .../display/modules/hdcp/hdcp2_transition.c | 48 ++++-- .../drm/amd/display/modules/hdcp/hdcp_ddc.c | 73 +++++++++ .../drm/amd/display/modules/inc/mod_hdcp.h | 38 ++++- 16 files changed, 617 insertions(+), 63 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dc_fused_io.c create mode 100644 drivers/gpu/drm/amd/display/dc/dc_fused_io.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f1de629c76378..40915767265d0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -115,6 +115,8 @@ #include "modules/inc/mod_freesync.h" #include "modules/power/power_helpers.h" +static_assert(AMDGPU_DMUB_NOTIFICATION_MAX == DMUB_NOTIFICATION_MAX, "AMDGPU_DMUB_NOTIFICATION_MAX mismatch"); + #define FIRMWARE_RENOIR_DMUB "amdgpu/renoir_dmcub.bin" MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB); #define FIRMWARE_SIENNA_CICHLID_DMUB "amdgpu/sienna_cichlid_dmcub.bin" @@ -749,6 +751,29 @@ static void dmub_aux_setconfig_callback(struct amdgpu_device *adev, complete(&adev->dm.dmub_aux_transfer_done); } +static void dmub_aux_fused_io_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) +{ + if (!adev || !notify) { + ASSERT(false); + return; + } + + const struct dmub_cmd_fused_request *req = ¬ify->fused_request; + const uint8_t ddc_line = req->u.aux.ddc_line; + + if (ddc_line >= ARRAY_SIZE(adev->dm.fused_io)) { + ASSERT(false); + return; + } + + struct fused_io_sync *sync = &adev->dm.fused_io[ddc_line]; + + static_assert(sizeof(*req) <= sizeof(sync->reply_data), "Size mismatch"); + memcpy(sync->reply_data, req, sizeof(*req)); + complete(&sync->replied); +} + /** * dmub_hpd_callback - DMUB HPD interrupt processing callback. * @adev: amdgpu_device pointer @@ -885,6 +910,30 @@ static void dm_handle_hpd_work(struct work_struct *work) } +static const char *dmub_notification_type_str(enum dmub_notification_type e) +{ + switch (e) { + case DMUB_NOTIFICATION_NO_DATA: + return "NO_DATA"; + case DMUB_NOTIFICATION_AUX_REPLY: + return "AUX_REPLY"; + case DMUB_NOTIFICATION_HPD: + return "HPD"; + case DMUB_NOTIFICATION_HPD_IRQ: + return "HPD_IRQ"; + case DMUB_NOTIFICATION_SET_CONFIG_REPLY: + return "SET_CONFIG_REPLY"; + case DMUB_NOTIFICATION_DPIA_NOTIFICATION: + return "DPIA_NOTIFICATION"; + case DMUB_NOTIFICATION_HPD_SENSE_NOTIFY: + return "HPD_SENSE_NOTIFY"; + case DMUB_NOTIFICATION_FUSED_IO: + return "FUSED_IO"; + default: + return ""; + } +} + #define DMUB_TRACE_MAX_READ 64 /** * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt @@ -902,15 +951,6 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) struct dmcub_trace_buf_entry entry = { 0 }; u32 count = 0; struct dmub_hpd_work *dmub_hpd_wrk; - static const char *const event_type[] = { - "NO_DATA", - "AUX_REPLY", - "HPD", - "HPD_IRQ", - "SET_CONFIGC_REPLY", - "DPIA_NOTIFICATION", - "HPD_SENSE_NOTIFY", - }; do { if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) { @@ -940,7 +980,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) } if (!dm->dmub_callback[notify.type]) { drm_warn(adev_to_drm(adev), "DMUB notification skipped due to no handler: type=%s\n", - event_type[notify.type]); + dmub_notification_type_str(notify.type)); continue; } if (dm->dmub_thread_offload[notify.type] == true) { @@ -2131,6 +2171,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc->debug.using_dml21 = true; } + if (amdgpu_dc_debug_mask & DC_HDCP_LC_FORCE_FW_ENABLE) + adev->dm.dc->debug.hdcp_lc_force_fw_enable = true; + + if (amdgpu_dc_debug_mask & DC_HDCP_LC_ENABLE_SW_FALLBACK) + adev->dm.dc->debug.hdcp_lc_enable_sw_fallback = true; + adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ @@ -2213,6 +2259,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub aux callback"); goto error; } + + for (size_t i = 0; i < ARRAY_SIZE(adev->dm.fused_io); i++) + init_completion(&adev->dm.fused_io[i].replied); + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_FUSED_IO, + dmub_aux_fused_io_callback, false)) { + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub fused io callback"); + goto error; + } /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. * It is expected that DMUB will resend any pending notifications at this point. Note * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to @@ -12812,6 +12867,79 @@ out: return ret; } +static void abort_fused_io( + struct dc_context *ctx, + const struct dmub_cmd_fused_request *request +) +{ + union dmub_rb_cmd command = { 0 }; + struct dmub_rb_cmd_fused_io *io = &command.fused_io; + + io->header.type = DMUB_CMD__FUSED_IO; + io->header.sub_type = DMUB_CMD__FUSED_IO_ABORT; + io->header.payload_bytes = sizeof(*io) - sizeof(io->header); + io->request = *request; + dm_execute_dmub_cmd(ctx, &command, DM_DMUB_WAIT_TYPE_NO_WAIT); +} + +static bool execute_fused_io( + struct amdgpu_device *dev, + struct dc_context *ctx, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +) +{ + const uint8_t ddc_line = commands[0].fused_io.request.u.aux.ddc_line; + + if (ddc_line >= ARRAY_SIZE(dev->dm.fused_io)) + return false; + + struct fused_io_sync *sync = &dev->dm.fused_io[ddc_line]; + struct dmub_rb_cmd_fused_io *first = &commands[0].fused_io; + const bool result = dm_execute_dmub_cmd_list(ctx, count, commands, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + && first->header.ret_status + && first->request.status == FUSED_REQUEST_STATUS_SUCCESS; + + if (!result) + return false; + + while (wait_for_completion_timeout(&sync->replied, usecs_to_jiffies(timeout_us))) { + reinit_completion(&sync->replied); + + struct dmub_cmd_fused_request *reply = (struct dmub_cmd_fused_request *) sync->reply_data; + + static_assert(sizeof(*reply) <= sizeof(sync->reply_data), "Size mismatch"); + + if (reply->identifier == first->request.identifier) { + first->request = *reply; + return true; + } + } + + reinit_completion(&sync->replied); + first->request.status = FUSED_REQUEST_STATUS_TIMEOUT; + abort_fused_io(ctx, &first->request); + return false; +} + +bool amdgpu_dm_execute_fused_io( + struct amdgpu_device *dev, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us) +{ + struct amdgpu_display_manager *dm = &dev->dm; + + mutex_lock(&dm->dpia_aux_lock); + + const bool result = execute_fused_io(dev, link->ctx, commands, count, timeout_us); + + mutex_unlock(&dm->dpia_aux_lock); + return result; +} + int amdgpu_dm_process_dmub_set_config_sync( struct dc_context *ctx, unsigned int link_index, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 740ff0b1fc130..c8201a7a1ad6c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,7 +50,7 @@ #define AMDGPU_DM_MAX_NUM_EDP 2 -#define AMDGPU_DMUB_NOTIFICATION_MAX 7 +#define AMDGPU_DMUB_NOTIFICATION_MAX 8 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 @@ -81,6 +81,7 @@ struct amdgpu_bo; struct dmub_srv; struct dc_plane_state; struct dmub_notification; +struct dmub_cmd_fused_request; struct amd_vsdb_block { unsigned char ieee_id[3]; @@ -637,6 +638,11 @@ struct amdgpu_display_manager { * OEM i2c bus */ struct amdgpu_i2c_adapter *oem_i2c; + + struct fused_io_sync { + struct completion replied; + char reply_data[0x40]; // Cannot include dmub_cmd here + } fused_io[8]; }; enum dsc_clock_force_state { @@ -1016,6 +1022,14 @@ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index, struct aux_payload *payload, enum aux_return_code_type *operation_result); +bool amdgpu_dm_execute_fused_io( + struct amdgpu_device *dev, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +); + int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index, struct set_config_cmd_payload *payload, enum set_config_status *operation_result); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index a3e93b2891f0f..2bd8dee1b7c2d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -26,6 +26,7 @@ #include "amdgpu_dm_hdcp.h" #include "amdgpu.h" #include "amdgpu_dm.h" +#include "dc_fused_io.h" #include "dm_helpers.h" #include #include "hdcp_psp.h" @@ -76,6 +77,34 @@ lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size) return dm_helpers_dp_read_dpcd(link->ctx, link, address, data, size); } +static bool lp_atomic_write_poll_read_i2c( + void *handle, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + struct dc_link *link = handle; + + return dm_atomic_write_poll_read_i2c(link, write, poll, read, poll_timeout_us, poll_mask_msb); +} + +static bool lp_atomic_write_poll_read_aux( + void *handle, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + struct dc_link *link = handle; + + return dm_atomic_write_poll_read_aux(link, write, poll, read, poll_timeout_us, poll_mask_msb); +} + static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size) { struct ta_hdcp_shared_memory *hdcp_cmd; @@ -719,7 +748,10 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, INIT_DELAYED_WORK(&hdcp_work[i].watchdog_timer_dwork, event_watchdog_timer); INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate); - hdcp_work[i].hdcp.config.psp.handle = &adev->psp; + struct mod_hdcp_config *config = &hdcp_work[i].hdcp.config; + struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs; + + config->psp.handle = &adev->psp; if (dc->ctx->dce_version == DCN_VERSION_3_1 || dc->ctx->dce_version == DCN_VERSION_3_14 || dc->ctx->dce_version == DCN_VERSION_3_15 || @@ -727,12 +759,22 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, dc->ctx->dce_version == DCN_VERSION_3_51 || dc->ctx->dce_version == DCN_VERSION_3_6 || dc->ctx->dce_version == DCN_VERSION_3_16) - hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1; - hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i); - hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c; - hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c; - hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd; - hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd; + config->psp.caps.dtm_v3_supported = 1; + config->ddc.handle = dc_get_link_at_index(dc, i); + + ddc_funcs->write_i2c = lp_write_i2c; + ddc_funcs->read_i2c = lp_read_i2c; + ddc_funcs->write_dpcd = lp_write_dpcd; + ddc_funcs->read_dpcd = lp_read_dpcd; + + config->debug.lc_enable_sw_fallback = dc->debug.hdcp_lc_enable_sw_fallback; + if (dc->caps.fused_io_supported || dc->debug.hdcp_lc_force_fw_enable) { + ddc_funcs->atomic_write_poll_read_i2c = lp_atomic_write_poll_read_i2c; + ddc_funcs->atomic_write_poll_read_aux = lp_atomic_write_poll_read_aux; + } else { + ddc_funcs->atomic_write_poll_read_i2c = NULL; + ddc_funcs->atomic_write_poll_read_aux = NULL; + } memset(hdcp_work[i].aconnector, 0, sizeof(struct amdgpu_dm_connector *) * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 2cd35392e2da7..62954b351ebda 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -630,6 +630,19 @@ bool dm_helpers_submit_i2c( return result; } +bool dm_helpers_execute_fused_io( + struct dc_context *ctx, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +) +{ + struct amdgpu_device *dev = ctx->driver_context; + + return amdgpu_dm_execute_fused_io(dev, link, commands, count, timeout_us); +} + static bool execute_synaptics_rc_command(struct drm_dp_aux *aux, bool is_write_cmd, unsigned char cmd, diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 3e1f5b689718e..3c9ecea7eebc7 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -53,31 +53,30 @@ DC_LIBS += hdcp ifdef CONFIG_DRM_AMD_DC_FP DC_LIBS += sspl -DC_SPL_TRANS += dc_spl_translate.o +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, dc_spl_translate.o) endif AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS))) include $(AMD_DC) -DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \ -dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o +FILES = +FILES += dc_dmub_srv.o +FILES += dc_edid_parser.o +FILES += dc_fused_io.o +FILES += dc_helper.o +FILES += core/dc.o +FILES += core/dc_debug.o +FILES += core/dc_hw_sequencer.o +FILES += core/dc_link_enc_cfg.o +FILES += core/dc_link_exports.o +FILES += core/dc_resource.o +FILES += core/dc_sink.o +FILES += core/dc_stat.o +FILES += core/dc_state.o +FILES += core/dc_stream.o +FILES += core/dc_surface.o +FILES += core/dc_vm_helper.o + +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, $(FILES)) -DISPLAY_CORE += dc_vm_helper.o - -AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE)) - -AMD_DM_REG_UPDATE = $(addprefix $(AMDDALPATH)/dc/,dc_helper.o) - -AMD_DC_SPL_TRANS = $(addprefix $(AMDDALPATH)/dc/,$(DC_SPL_TRANS)) - -AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE) -AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE) - -DC_DMUB += dc_dmub_srv.o -DC_EDID += dc_edid_parser.o -AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB)) -AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID)) -AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID) - -AMD_DISPLAY_FILES += $(AMD_DC_SPL_TRANS) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a24d004f8b573..c989bb9798ddf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -282,6 +282,7 @@ struct dc_caps { bool edp_dsc_support; bool vbios_lttpr_aware; bool vbios_lttpr_enable; + bool fused_io_supported; uint32_t max_otg_num; uint32_t max_cab_allocation_bytes; uint32_t cache_line_size; @@ -903,6 +904,9 @@ struct dc_debug_options { bool voltage_align_fclk; bool disable_min_fclk; + bool hdcp_lc_force_fw_enable; + bool hdcp_lc_enable_sw_fallback; + bool disable_dfs_bypass; bool disable_dpp_power_gate; bool disable_hubp_power_gate; diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.c b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c new file mode 100644 index 0000000000000..fff41b0a0a5a7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "dc_fused_io.h" + +#include "dm_helpers.h" +#include "gpio.h" + +static bool op_i2c_convert( + union dmub_rb_cmd *cmd, + const struct mod_hdcp_atomic_op_i2c *op, + enum dmub_cmd_fused_request_type type, + uint32_t ddc_line +) +{ + struct dmub_cmd_fused_request *req = &cmd->fused_io.request; + struct dmub_cmd_fused_request_location_i2c *loc = &req->u.i2c; + + if (!op || op->size > sizeof(req->buffer)) + return false; + + req->type = type; + loc->is_aux = false; + loc->ddc_line = ddc_line; + loc->address = op->address; + loc->offset = op->offset; + loc->length = op->size; + memcpy(req->buffer, op->data, op->size); + + return true; +} + +static bool op_aux_convert( + union dmub_rb_cmd *cmd, + const struct mod_hdcp_atomic_op_aux *op, + enum dmub_cmd_fused_request_type type, + uint32_t ddc_line +) +{ + struct dmub_cmd_fused_request *req = &cmd->fused_io.request; + struct dmub_cmd_fused_request_location_aux *loc = &req->u.aux; + + if (!op || op->size > sizeof(req->buffer)) + return false; + + req->type = type; + loc->is_aux = true; + loc->ddc_line = ddc_line; + loc->address = op->address; + loc->length = op->size; + memcpy(req->buffer, op->data, op->size); + + return true; +} + +static bool atomic_write_poll_read( + struct dc_link *link, + union dmub_rb_cmd commands[3], + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + const uint8_t count = 3; + const uint32_t timeout_per_request_us = 10000; + const uint32_t timeout_per_aux_transaction_us = 10000; + uint64_t timeout_us = 0; + + commands[1].fused_io.request.poll_mask_msb = poll_mask_msb; + commands[1].fused_io.request.timeout_us = poll_timeout_us; + + for (uint8_t i = 0; i < count; i++) { + struct dmub_rb_cmd_fused_io *io = &commands[i].fused_io; + + io->header.type = DMUB_CMD__FUSED_IO; + io->header.sub_type = DMUB_CMD__FUSED_IO_EXECUTE; + io->header.multi_cmd_pending = i != count - 1; + io->header.payload_bytes = sizeof(commands[i].fused_io) - sizeof(io->header); + + timeout_us += timeout_per_request_us + io->request.timeout_us; + if (!io->request.timeout_us && io->request.u.aux.is_aux) + timeout_us += timeout_per_aux_transaction_us * (io->request.u.aux.length / 16); + } + + if (!dm_helpers_execute_fused_io(link->ctx, link, commands, count, timeout_us)) + return false; + + return commands[0].fused_io.request.status == FUSED_REQUEST_STATUS_SUCCESS; +} + +bool dm_atomic_write_poll_read_i2c( + struct dc_link *link, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + if (!link) + return false; + + const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en; + union dmub_rb_cmd commands[3] = { 0 }; + const bool converted = op_i2c_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line) + && op_i2c_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line) + && op_i2c_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line); + + if (!converted) + return false; + + const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb); + + memcpy(read->data, commands[0].fused_io.request.buffer, read->size); + return result; +} + +bool dm_atomic_write_poll_read_aux( + struct dc_link *link, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + if (!link) + return false; + + const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en; + union dmub_rb_cmd commands[3] = { 0 }; + const bool converted = op_aux_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line) + && op_aux_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line) + && op_aux_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line); + + if (!converted) + return false; + + const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb); + + memcpy(read->data, commands[0].fused_io.request.buffer, read->size); + return result; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.h b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h new file mode 100644 index 0000000000000..c749172409859 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __DC_FUSED_IO_H__ +#define __DC_FUSED_IO_H__ + +#include "dc.h" +#include "mod_hdcp.h" + +bool dm_atomic_write_poll_read_i2c( + struct dc_link *link, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +); + +bool dm_atomic_write_poll_read_aux( + struct dc_link *link, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +); + +#endif // __DC_FUSED_IO_H__ + diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 5efddd48d5c50..9d160b39e8c5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -153,6 +153,14 @@ bool dm_helpers_submit_i2c( const struct dc_link *link, struct i2c_command *cmd); +bool dm_helpers_execute_fused_io( + struct dc_context *ctx, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +); + bool dm_helpers_dp_write_dsc_enable( struct dc_context *ctx, const struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 22615b47d9bfc..440a426b81c18 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -142,6 +142,7 @@ enum dmub_notification_type { DMUB_NOTIFICATION_SET_CONFIG_REPLY, DMUB_NOTIFICATION_DPIA_NOTIFICATION, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, + DMUB_NOTIFICATION_FUSED_IO, DMUB_NOTIFICATION_MAX }; @@ -595,6 +596,7 @@ struct dmub_notification { enum dp_hpd_status hpd_status; enum set_config_status sc_status; struct dmub_rb_cmd_hpd_sense_notify_data hpd_sense_notify; + struct dmub_cmd_fused_request fused_request; }; }; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c index 1c33857aa513c..567c5b1aeb7ae 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c @@ -102,6 +102,10 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, &cmd.hpd_sense_notify.data, sizeof(cmd.hpd_sense_notify.data)); break; + case DMUB_OUT_CMD__FUSED_IO: + notify->type = DMUB_NOTIFICATION_FUSED_IO; + dmub_memcpy(¬ify->fused_request, &cmd.fused_io.request, sizeof(cmd.fused_io.request)); + break; default: notify->type = DMUB_NOTIFICATION_NO_DATA; break; diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index 55c7d873175ff..a37634942b075 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -386,6 +386,7 @@ enum mod_hdcp_status mod_hdcp_write_repeater_auth_ack(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_write_stream_manage(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_write_content_type(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp); +enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp); /* hdcp version helpers */ static inline uint8_t is_dp_hdcp(struct mod_hdcp *hdcp) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 1d41dd58f6bce..bb8ae80b37f85 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -452,21 +452,12 @@ out: return status; } -static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp, +static enum mod_hdcp_status locality_check_sw(struct mod_hdcp *hdcp, struct mod_hdcp_event_context *event_ctx, struct mod_hdcp_transition_input_hdcp2 *input) { enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) { - event_ctx->unexpected_event = 1; - goto out; - } - - if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init, - &input->lc_init_prepare, &status, - hdcp, "lc_init_prepare")) - goto out; if (!mod_hdcp_execute_and_set(mod_hdcp_write_lc_init, &input->lc_init_write, &status, hdcp, "lc_init_write")) @@ -482,6 +473,48 @@ static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp, &input->l_prime_read, &status, hdcp, "l_prime_read")) goto out; +out: + return status; +} + +static enum mod_hdcp_status locality_check_fw(struct mod_hdcp *hdcp, + struct mod_hdcp_event_context *event_ctx, + struct mod_hdcp_transition_input_hdcp2 *input) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + + if (!mod_hdcp_execute_and_set(mod_hdcp_write_poll_read_lc_fw, + &input->l_prime_read, &status, + hdcp, "l_prime_read")) + goto out; + +out: + return status; +} + +static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp, + struct mod_hdcp_event_context *event_ctx, + struct mod_hdcp_transition_input_hdcp2 *input) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c + && hdcp->config.ddc.funcs.atomic_write_poll_read_aux + && !hdcp->connection.link.adjust.hdcp2.force_sw_locality_check; + + if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) { + event_ctx->unexpected_event = 1; + goto out; + } + + if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init, + &input->lc_init_prepare, &status, + hdcp, "lc_init_prepare")) + goto out; + + status = (use_fw ? locality_check_fw : locality_check_sw)(hdcp, event_ctx, input); + if (status != MOD_HDCP_STATUS_SUCCESS) + goto out; + if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_l_prime, &input->l_prime_validation, &status, hdcp, "l_prime_validation")) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c index c5f6c11de7e5d..89ffb89e19325 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c @@ -184,17 +184,28 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, H2_A2_LOCALITY_CHECK); break; - case H2_A2_LOCALITY_CHECK: + case H2_A2_LOCALITY_CHECK: { + const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c + && !adjust->hdcp2.force_sw_locality_check; + + /* + * 1A-05: consider disconnection after LC init a failure + * 1A-13-1: consider invalid l' a failure + * 1A-13-2: consider l' timeout a failure + */ if (hdcp->state.stay_count > 10 || input->lc_init_prepare != PASS || - input->lc_init_write != PASS || - input->l_prime_available_poll != PASS || - input->l_prime_read != PASS) { - /* - * 1A-05: consider disconnection after LC init a failure - * 1A-13-1: consider invalid l' a failure - * 1A-13-2: consider l' timeout a failure - */ + (!use_fw && input->lc_init_write != PASS) || + (!use_fw && input->l_prime_available_poll != PASS)) { + fail_and_restart_in_ms(0, &status, output); + break; + } else if (input->l_prime_read != PASS) { + if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) { + adjust->hdcp2.force_sw_locality_check = true; + callback_in_ms(0, output); + break; + } + fail_and_restart_in_ms(0, &status, output); break; } else if (input->l_prime_validation != PASS) { @@ -205,6 +216,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER); break; + } case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER: if (input->eks_prepare != PASS || input->eks_write != PASS) { @@ -498,12 +510,23 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, D2_A2_LOCALITY_CHECK); break; - case D2_A2_LOCALITY_CHECK: + case D2_A2_LOCALITY_CHECK: { + const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_aux + && !adjust->hdcp2.force_sw_locality_check; + if (hdcp->state.stay_count > 10 || input->lc_init_prepare != PASS || - input->lc_init_write != PASS || - input->l_prime_read != PASS) { + (!use_fw && input->lc_init_write != PASS)) { /* 1A-12: consider invalid l' a failure */ + fail_and_restart_in_ms(0, &status, output); + break; + } else if (input->l_prime_read != PASS) { + if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) { + adjust->hdcp2.force_sw_locality_check = true; + callback_in_ms(0, output); + break; + } + fail_and_restart_in_ms(0, &status, output); break; } else if (input->l_prime_validation != PASS) { @@ -514,6 +537,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER); break; + } case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER: if (input->eks_prepare != PASS || input->eks_write != PASS) { diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c index 6e064e6ae949f..2e64085791948 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c @@ -688,3 +688,76 @@ enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp) return MOD_HDCP_STATUS_INVALID_OPERATION; } + +static bool write_stall_read_lc_fw_aux(struct mod_hdcp *hdcp) +{ + struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2; + + struct mod_hdcp_atomic_op_aux write = { + hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT], + hdcp2->lc_init + 1, + sizeof(hdcp2->lc_init) - 1, + }; + struct mod_hdcp_atomic_op_aux stall = { 0, NULL, 0, }; + struct mod_hdcp_atomic_op_aux read = { + hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME], + hdcp2->lc_l_prime + 1, + sizeof(hdcp2->lc_l_prime) - 1, + }; + + hdcp2->lc_l_prime[0] = HDCP_2_2_LC_SEND_LPRIME; + + return hdcp->config.ddc.funcs.atomic_write_poll_read_aux( + hdcp->config.ddc.handle, + &write, + &stall, + &read, + 16 * 1000, + 0 + ); +} + +static bool write_poll_read_lc_fw_i2c(struct mod_hdcp *hdcp) +{ + struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2; + uint8_t expected_rxstatus[2] = { sizeof(hdcp2->lc_l_prime) }; + + hdcp->buf[0] = hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT]; + memmove(&hdcp->buf[1], hdcp2->lc_init, sizeof(hdcp2->lc_init)); + + struct mod_hdcp_atomic_op_i2c write = { + HDCP_I2C_ADDR, + 0, + hdcp->buf, + sizeof(hdcp2->lc_init) + 1, + }; + struct mod_hdcp_atomic_op_i2c poll = { + HDCP_I2C_ADDR, + hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_RXSTATUS], + expected_rxstatus, + sizeof(expected_rxstatus), + }; + struct mod_hdcp_atomic_op_i2c read = { + HDCP_I2C_ADDR, + hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME], + hdcp2->lc_l_prime, + sizeof(hdcp2->lc_l_prime), + }; + + return hdcp->config.ddc.funcs.atomic_write_poll_read_i2c( + hdcp->config.ddc.handle, + &write, + &poll, + &read, + 20 * 1000, + 6 + ); +} + +enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp) +{ + const bool success = (is_dp_hdcp(hdcp) ? write_stall_read_lc_fw_aux : write_poll_read_lc_fw_i2c)(hdcp); + + return success ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_DDC_FAILURE; +} + diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index a4d344a4db9e1..c42468bb70ac7 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -133,9 +133,22 @@ enum mod_hdcp_display_disable_option { MOD_HDCP_DISPLAY_DISABLE_ENCRYPTION, }; +struct mod_hdcp_atomic_op_i2c { + uint8_t address; + uint8_t offset; + uint8_t *data; + uint32_t size; +}; + +struct mod_hdcp_atomic_op_aux { + uint32_t address; + uint8_t *data; + uint32_t size; +}; + struct mod_hdcp_ddc { void *handle; - struct { + struct mod_hdcp_ddc_funcs { bool (*read_i2c)(void *handle, uint32_t address, uint8_t offset, @@ -153,6 +166,22 @@ struct mod_hdcp_ddc { uint32_t address, const uint8_t *data, uint32_t size); + bool (*atomic_write_poll_read_i2c)( + void *handle, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb + ); + bool (*atomic_write_poll_read_aux)( + void *handle, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb + ); } funcs; }; @@ -185,7 +214,8 @@ struct mod_hdcp_link_adjustment_hdcp2 { uint8_t force_type : 2; uint8_t force_no_stored_km : 1; uint8_t increase_h_prime_timeout: 1; - uint8_t reserved : 3; + uint8_t force_sw_locality_check : 1; + uint8_t reserved : 2; }; struct mod_hdcp_link_adjustment { @@ -272,6 +302,10 @@ struct mod_hdcp_display_query { struct mod_hdcp_config { struct mod_hdcp_psp psp; struct mod_hdcp_ddc ddc; + struct { + uint8_t lc_enable_sw_fallback : 1; + uint8_t reserved : 7; + } debug; uint8_t index; }; -- GitLab From ef62b92b9d6290cf9bd09699723e6d18dda9a582 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 26 Mar 2025 15:33:03 -0500 Subject: [PATCH 110/899] drm/amd/display: Adjust all dev_*() messages to drm_*() [Why] dev_*() messages don't show that they are from a driver in drm subsystem. [How] Change all dev_*() messages to drm_*() messages. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +++++----- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 4 ++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 40915767265d0..b920760108d2c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2503,7 +2503,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } if (r) { - dev_err(adev->dev, "amdgpu_dm: Can't validate firmware \"%s\"\n", + drm_err(adev_to_drm(adev), "amdgpu_dm: Can't validate firmware \"%s\"\n", fw_name_dmcu); amdgpu_ucode_release(&adev->dm.fw_dmcu); return r; @@ -3052,13 +3052,13 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) if (oem_ddc_service) { oem_i2c = create_i2c(oem_ddc_service, true); if (!oem_i2c) { - dev_info(adev->dev, "Failed to create oem i2c adapter data\n"); + drm_info(adev_to_drm(adev), "Failed to create oem i2c adapter data\n"); return -ENOMEM; } r = i2c_add_adapter(&oem_i2c->base); if (r) { - dev_info(adev->dev, "Failed to register oem i2c\n"); + drm_info(adev_to_drm(adev), "Failed to register oem i2c\n"); kfree(oem_i2c); return r; } @@ -3101,7 +3101,7 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block) r = dm_oem_i2c_hw_init(adev); if (r) - dev_info(adev->dev, "Failed to add OEM i2c bus\n"); + drm_info(adev_to_drm(adev), "Failed to add OEM i2c bus\n"); return 0; } @@ -5648,7 +5648,7 @@ static int dm_early_init(struct amdgpu_ip_block *ip_block) /* if there is no object header, skip DM */ if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; - dev_info(adev->dev, "No object header, skipping DM\n"); + drm_info(adev_to_drm(adev), "No object header, skipping DM\n"); return -ENOENT; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 3e0f45f1711c1..b7c6e8d134350 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -948,13 +948,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, adev = amdgpu_ttm_adev(rbo->tbo.bdev); r = amdgpu_bo_reserve(rbo, true); if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r); return r; } r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r); goto error_unlock; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 0d5fefb0f5917..d9527c05fc878 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -102,13 +102,13 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector r = amdgpu_bo_reserve(rbo, true); if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r); return r; } r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r); goto error_unlock; } -- GitLab From 4321742c394ec369c93dd8860eb420165027d8f5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 26 Mar 2025 16:01:54 -0500 Subject: [PATCH 111/899] drm/amd/display: Move PSR support message into amdgpu_dm [Why] PSR support could vary from the panels connected to one GPU versus another. [How] Move PSR support message into amdgpu_dm which has the scope of the GPU and use that information. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 ++++++++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 8 -------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b920760108d2c..58e758732338a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5417,8 +5417,15 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (amdgpu_dm_set_replay_caps(link, aconnector)) psr_feature_enabled = false; - if (psr_feature_enabled) + if (psr_feature_enabled) { amdgpu_dm_set_psr_caps(link); + drm_info(adev_to_drm(adev), "PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n", + link->psr_settings.psr_feature_enabled, + link->psr_settings.psr_version, + link->dpcd_caps.psr_info.psr_version, + link->dpcd_caps.psr_info.psr_dpcd_caps.raw, + link->dpcd_caps.psr_info.psr2_su_y_granularity_cap); + } } } amdgpu_set_panel_orientation(&aconnector->base); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index e140b7a04d724..f984cb0cb8897 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -87,14 +87,6 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link) link->psr_settings.psr_feature_enabled = true; } - - DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n", - link->psr_settings.psr_feature_enabled, - link->psr_settings.psr_version, - link->dpcd_caps.psr_info.psr_version, - link->dpcd_caps.psr_info.psr_dpcd_caps.raw, - link->dpcd_caps.psr_info.psr2_su_y_granularity_cap); - } /* -- GitLab From 33056a97ae5e1ff37535d70cbe5dcf1bbc70f20d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 4 Dec 2024 14:27:42 -0600 Subject: [PATCH 112/899] drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm` [Why] A variety of the 3DLUT handling functions check `debug.enable_mem_low_power.bits.cm` both in the caller and function. This is unnecessary overhead. [How] For each of them reduce to just checking just in caller or function. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index abf439e743f23..2d70586cef402 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -790,8 +790,7 @@ static bool dpp3_program_blnd_lut(struct dpp *dpp_base, if (params == NULL) { REG_SET(CM_BLNDGAM_CONTROL, 0, CM_BLNDGAM_MODE, 0); - if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm) - dpp3_power_on_blnd_lut(dpp_base, false); + dpp3_power_on_blnd_lut(dpp_base, false); return false; } @@ -1204,8 +1203,7 @@ static bool dpp3_program_shaper(struct dpp *dpp_base, if (params == NULL) { REG_SET(CM_SHAPER_CONTROL, 0, CM_SHAPER_LUT_MODE, 0); - if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm) - dpp3_power_on_shaper(dpp_base, false); + dpp3_power_on_shaper(dpp_base, false); return false; } @@ -1399,8 +1397,7 @@ static bool dpp3_program_3dlut(struct dpp *dpp_base, if (params == NULL) { dpp3_set_3dlut_mode(dpp_base, LUT_BYPASS, false, false); - if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm) - dpp3_power_on_hdr3dlut(dpp_base, false); + dpp3_power_on_hdr3dlut(dpp_base, false); return false; } -- GitLab From 556db637c27a463cd6bafa40714ea1245414456a Mon Sep 17 00:00:00 2001 From: Ausef Yousof Date: Fri, 28 Mar 2025 11:06:27 -0400 Subject: [PATCH 113/899] drm/amd/display: wait for updates to latch before locking [why&how] It is possible for an update to acquire otg lock and begin programming while the previous update has not completed and its values have not latched. The correct way to go about this is to wait until the vupdate pulses so we can be sure that previous updates have latched and we can continue with the current update pipe programming, otherwise during consecutive full updates we will have corruption flash on the screen. The corruption flash occurs specifically on configs that require odm combine, and its local to a specific pipe (will not flash across whole screen). This ticket is across the otg slave, but it may also appear across master. Reviewed-by: Leo Chen Signed-off-by: Ausef Yousof Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 34 +++++ .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 122 ++++++++++++++++++ .../amd/display/dc/hwss/dcn10/dcn10_hwseq.h | 7 + .../amd/display/dc/hwss/dcn35/dcn35_init.c | 2 + .../amd/display/dc/hwss/dcn351/dcn351_init.c | 2 + .../display/dc/hwss/hw_sequencer_private.h | 2 + .../gpu/drm/amd/display/dc/inc/core_types.h | 4 + drivers/gpu/drm/amd/display/dc/inc/hw/optc.h | 1 + .../amd/display/dc/optc/dcn35/dcn35_optc.c | 1 + 9 files changed, 175 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index be63cc4aca1f5..636999fcaebb7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2104,6 +2104,18 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc->hwss.enable_accelerated_mode(dc, context); } + if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + //Only delay otg master for a given config + if (resource_is_pipe_type(pipe, OTG_MASTER)) { + //dc_commit_state_no_check is always a full update + dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, pipe, false); + break; + } + } + } + if (context->stream_count > get_seamless_boot_stream_count(context) || context->stream_count == 0) dc->hwss.prepare_bandwidth(dc, context); @@ -2168,6 +2180,14 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (dc->hwss.program_front_end_for_ctx) { dc->hwss.interdependent_update_lock(dc, context, true); dc->hwss.program_front_end_for_ctx(dc, context); + + if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe); + } + } + dc->hwss.interdependent_update_lock(dc, context, false); dc->hwss.post_unlock_program_front_end(dc, context); } @@ -4049,6 +4069,7 @@ static void commit_planes_for_stream(struct dc *dc, &context->res_ctx, stream); ASSERT(top_pipe_to_program != NULL); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -4099,6 +4120,9 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program); } + if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) + dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type == UPDATE_TYPE_FAST); + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use); @@ -4235,6 +4259,16 @@ static void commit_planes_for_stream(struct dc *dc, } if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) { dc->hwss.program_front_end_for_ctx(dc, context); + + //Pipe busy until some frame and line # + if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe && update_type == UPDATE_TYPE_FULL) { + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe_ctx); + } + } + if (dc->debug.validate_dml_output) { for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e34a93b703a7a..f9ee55998b6b2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -94,6 +94,128 @@ static void print_microsec(struct dc_context *dc_ctx, us_x10 % frac); } +/* + * Delay until we passed busy-until-point to which we can + * do necessary locking/programming on consecutive full updates + */ +void dcn10_wait_for_pipe_update_if_needed(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only) +{ + struct crtc_position position; + struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int vpos, frame_count; + uint32_t vupdate_start, vupdate_end, vblank_start; + unsigned int lines_to_vupdate, us_to_vupdate; + unsigned int us_per_line, us_vupdate; + + if (!pipe_ctx->stream || + !pipe_ctx->stream_res.tg || + !pipe_ctx->stream_res.stream_enc) + return; + + if (pipe_ctx->prev_odm_pipe && + pipe_ctx->stream) + return; + + if (!pipe_ctx->wait_is_required) + return; + + struct timing_generator *tg = pipe_ctx->stream_res.tg; + + if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg)) + return; + + dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start, + &vupdate_end); + + dc->hwss.get_position(&pipe_ctx, 1, &position); + vpos = position.vertical_count; + + frame_count = tg->funcs->get_frame_count(tg); + + if (frame_count - pipe_ctx->wait_frame_count > 2) + return; + + vblank_start = pipe_ctx->pipe_dlg_param.vblank_start; + + if (vpos >= vupdate_start && vupdate_start >= vblank_start) + lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start; + else + lines_to_vupdate = vupdate_start - vpos; + + us_per_line = + stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz; + us_to_vupdate = lines_to_vupdate * us_per_line; + + if (vupdate_end < vupdate_start) + vupdate_end += stream->timing.v_total; + + if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start) + us_to_vupdate = 0; + + us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line; + + if (is_surface_update_only && us_to_vupdate + us_vupdate > 200) { + //surface updates come in at high irql + pipe_ctx->wait_is_required = true; + return; + } + + fsleep(us_to_vupdate + us_vupdate); + + //clear + pipe_ctx->next_vupdate = 0; + pipe_ctx->wait_frame_count = 0; + pipe_ctx->wait_is_required = false; +} + +/* + * On pipe unlock and programming, indicate pipe will be busy + * until some frame and line (vupdate), this is required for consecutive + * full updates, need to wait for updates + * to latch to try and program the next update + */ +void dcn10_set_wait_for_update_needed_for_pipe(struct dc *dc, struct pipe_ctx *pipe_ctx) +{ + uint32_t vupdate_start, vupdate_end; + struct crtc_position position; + unsigned int vpos, cur_frame; + + if (!pipe_ctx->stream || + !pipe_ctx->stream_res.tg || + !pipe_ctx->stream_res.stream_enc) + return; + + dc->hwss.get_position(&pipe_ctx, 1, &position); + vpos = position.vertical_count; + + dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start, + &vupdate_end); + + struct timing_generator *tg = pipe_ctx->stream_res.tg; + + struct optc *optc1 = DCN10TG_FROM_TG(tg); + + ASSERT(optc1->max_frame_count != 0); + + if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg)) + return; + + pipe_ctx->next_vupdate = vupdate_start; + + cur_frame = tg->funcs->get_frame_count(tg); + + if (vpos < vupdate_start) { + pipe_ctx->wait_frame_count = cur_frame; + } else { + if (cur_frame + 1 > optc1->max_frame_count) + pipe_ctx->wait_frame_count = cur_frame + 1 - optc1->max_frame_count; + else + pipe_ctx->wait_frame_count = cur_frame + 1; + } + + pipe_ctx->wait_is_required = true; +} + void dcn10_lock_all_pipes(struct dc *dc, struct dc_state *context, bool lock) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h index 42ffd1e1299c8..57d30ea225f2b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h @@ -50,6 +50,13 @@ void dcn10_optimize_bandwidth( void dcn10_prepare_bandwidth( struct dc *dc, struct dc_state *context); +void dcn10_wait_for_pipe_update_if_needed( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool is_surface_update_only); +void dcn10_set_wait_for_update_needed_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx); void dcn10_pipe_control_lock( struct dc *dc, struct pipe_ctx *pipe, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 6a82a865209cb..a3ccf805bd16a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -168,6 +168,8 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, + .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, + .set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe, }; void dcn35_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 902a96940a015..a4e6b6479983f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -162,6 +162,8 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, + .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, + .set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe, }; void dcn351_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 22a5d4a03c988..09bc65c2fa23b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -183,6 +183,8 @@ struct hwseq_private_funcs { struct dc_cm2_func_luts mcm_luts, bool lut_bank_a); void (*perform_3dlut_wa_unlock)(struct pipe_ctx *pipe_ctx); + void (*wait_for_pipe_update_if_needed)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only); + void (*set_wait_for_update_needed_for_pipe)(struct dc *dc, struct pipe_ctx *pipe_ctx); }; struct dce_hwseq { diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 338bc240e8036..1ed3461440aad 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -480,6 +480,10 @@ struct pipe_ctx { struct pixel_rate_divider pixel_rate_divider; /* pixels borrowed from hblank to hactive */ uint8_t hblank_borrow; + /* next vupdate */ + uint32_t next_vupdate; + uint32_t wait_frame_count; + bool wait_is_required; }; /* Data used for dynamic link encoder assignment. diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 7f371cbb35cde..0d5a8358a7780 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -68,6 +68,7 @@ struct optc { int pstate_keepout; struct dc_crtc_timing orginal_patched_timing; enum signal_type signal; + uint32_t max_frame_count; }; void optc1_read_otg_state(struct timing_generator *optc, struct dcn_otg_state *s); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index b86fe2b094f81..4cfc6c0fa147f 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -507,6 +507,7 @@ void dcn35_timing_generator_init(struct optc *optc1) optc1->min_v_blank_interlace = 5; optc1->min_h_sync_width = 4; optc1->min_v_sync_width = 1; + optc1->max_frame_count = 0xFFFFFF; dcn35_timing_generator_set_fgcg( optc1, CTX->dc->debug.enable_fine_grain_clock_gating.bits.optc); -- GitLab From 32be4e39f459f3ac9c191569ae8e3731cb82f7ab Mon Sep 17 00:00:00 2001 From: Ausef Yousof Date: Fri, 28 Mar 2025 11:06:50 -0400 Subject: [PATCH 114/899] drm/amd/display: dont disable dtb as dto src during dpms off fix was previously in 25.20 but was reverted out as it was accompanied by other changes that caused regression. [why&how] Disabling dtb as the dto src during dpms off relies on in the same instance being able to also alter the dto src bit to dpref (or not dtb in general), but this was recently changed to only take place in dcn31_program_pix_clk, as that is where we want to perform any dto src changes because tg is off at that point, it is unsafe to do that elsewhere. What this means is now instead of disabling dtb as dto src and modifying source bit, we are left with the configuration for a given tg that specifies dtb as dto src and dtb dto en simultaneously is unset. dcn31_program_pix_clk can rectify this but its possible for us to perform some tg dependant operation that would simply hang because when we go to enable say crtc then, the clk we specify as dto src is "off" en bit is cleared, source bit was never changed, and program_pix_clk hasnt been called yet (as apart of dpms on) We cant disable it as dto src during dpms off if we want the luxury of performing tg dependant operation during dpms off and before dpms on. Reviewed-by: Yihan Zhu Signed-off-by: Ausef Yousof Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 5 ----- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 ++++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5656d10368add..778b68ec489e0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1152,7 +1152,6 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); struct dccg *dccg = dc->res_pool->dccg; struct timing_generator *tg = pipe_ctx->stream_res.tg; - struct dtbclk_dto_params dto_params = {0}; int dp_hpo_inst; struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc; struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; @@ -1179,14 +1178,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) link_hwss->reset_stream_encoder(pipe_ctx); if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && dccg) { - dto_params.otg_inst = tg->inst; - dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; if (dccg) { dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - if (dccg && dccg->funcs->set_dtbclk_dto) - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 846c9c51f2d98..959cd2fbb2507 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2806,6 +2806,8 @@ void dcn20_reset_back_end_for_pipe( { struct dc_link *link = pipe_ctx->stream->link; const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + struct dccg *dccg = dc->res_pool->dccg; + struct dtbclk_dto_params dto_params = {0}; DC_LOGGER_INIT(dc->ctx->logger); if (pipe_ctx->stream_res.stream_enc == NULL) { @@ -2866,6 +2868,12 @@ void dcn20_reset_back_end_for_pipe( &pipe_ctx->link_res, pipe_ctx->stream->signal); link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; } + if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && dccg) { + dto_params.otg_inst = pipe_ctx->stream_res.tg->inst; + dto_params.timing = &pipe_ctx->stream->timing; + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + } } /* -- GitLab From 0d93e821867c2a6edcd15317311dcb99f7a16887 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 26 Mar 2025 17:11:35 +0800 Subject: [PATCH 115/899] drm/amd/display: turn off eDP lcdvdd and backlight if not required [why] A+N configuration, eDP on A-APU is off, extended display active. Resume from s4, eDP's backlight is still on. [how] Turn off inactive eDP backlight and lcdvdd. Reviewed-by: Charlene Liu Reviewed-by: Aric Cyr Signed-off-by: Charlene Liu Signed-off-by: Jing Zhou Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++-- .../amd/display/dc/link/protocols/link_edp_panel_control.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 636999fcaebb7..66e23507eb825 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6298,10 +6298,10 @@ void dc_query_current_properties(struct dc *dc, struct dc_current_properties *pr void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link, bool powerOn) { - if (edp_link->connector_signal != SIGNAL_TYPE_EDP) + if (!edp_link || !edp_link->dc || !edp_link->dc->link_srv) return; - if (edp_link->skip_implict_edp_power_control == false) + if (edp_link->connector_signal != SIGNAL_TYPE_EDP) return; edp_link->dc->link_srv->edp_set_panel_power(edp_link, powerOn); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index da74c2b5854f3..f50c4dbc7553c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -393,6 +393,7 @@ void edp_set_panel_power(struct dc_link *link, bool powerOn) // 3. Rx power on dpcd_write_rx_power_ctrl(link, true); + DC_LOG_BACKLIGHT("eDP power and backlight: Power on"); } else { // 3. Rx power off dpcd_write_rx_power_ctrl(link, false); @@ -404,6 +405,7 @@ void edp_set_panel_power(struct dc_link *link, bool powerOn) // 1. panel VDD off if (!link->dc->config.edp_no_power_sequencing) link->dc->hwss.edp_power_control(link, false); + DC_LOG_BACKLIGHT("eDP power and backlight: Power off"); } } -- GitLab From 8581214d5e5505a061d0697c405ca263a8a5cd45 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Sun, 30 Mar 2025 15:12:35 -0400 Subject: [PATCH 116/899] drm/amd/display: [FW Promotion] Release 0.1.5.0 Aligning dmub_cmd header with dmu firmware release 0.1.5.0 Signed-off-by: Taimur Hassan Signed-off-by: Roman Li Reviewed-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 4a4e980688933..b11cf41c2d515 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2636,7 +2636,11 @@ enum dp_hpd_type { /** * DP HPD short pulse */ - DP_IRQ + DP_IRQ = 1, + /** + * Failure to acquire DP HPD state + */ + DP_NONE_HPD = 2 }; /** -- GitLab From e3895e8a872ce2da7e504be24aa6cfff351562dc Mon Sep 17 00:00:00 2001 From: Sherry Wang Date: Fri, 28 Mar 2025 13:33:12 +0800 Subject: [PATCH 117/899] drm/amd/display: rename IPS2 entry/exit message [Why&How] Fix the confusing entry/exit message name for IPS2 Reviewed-by: Duncan Ma Signed-off-by: Sherry Wang Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index f6f0e6a33001c..604d256cb47a6 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -84,8 +84,8 @@ #define VBIOSSMC_MSG_AllowZstatesEntry 0x15 #define VBIOSSMC_MSG_DisallowZstatesEntry 0x16 #define VBIOSSMC_MSG_SetDtbClk 0x17 -#define VBIOSSMC_MSG_DispPsrEntry 0x18 ///< Display PSR entry, DMU -#define VBIOSSMC_MSG_DispPsrExit 0x19 ///< Display PSR exit, DMU +#define VBIOSSMC_MSG_DispIPS2Entry 0x18 ///< Display IPS2 entry, DMU +#define VBIOSSMC_MSG_DispIPS2Exit 0x19 ///< Display IPS2 exit, DMU #define VBIOSSMC_MSG_DisableLSdma 0x1A ///< Disable LSDMA; only sent by VBIOS #define VBIOSSMC_MSG_DpControllerPhyStatus 0x1B ///< Inform PMFW about the pre conditions for turning SLDO2 on/off . bit[0]==1 precondition is met, bit[1-2] are for DPPHY number #define VBIOSSMC_MSG_QueryIPS2Support 0x1C ///< Return 1: support; else not supported @@ -475,7 +475,7 @@ int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr) retv = dcn35_smu_send_msg_with_param( clk_mgr, - VBIOSSMC_MSG_DispPsrExit, + VBIOSSMC_MSG_DispIPS2Exit, 0); smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv); return retv; -- GitLab From dd035239c96ea8dfa4e19513e8f0952a93dd41bf Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Mon, 31 Mar 2025 10:13:01 -0500 Subject: [PATCH 118/899] drm/amd/display: Promote DC to 3.2.328 Summary: * Optimize custom brightness curve * Correct SSC enable detection for DCN351 * Turn off eDP lcdvdd and backlight if not required * Use DMUB Fused IO interface for HDCP * Extend eDP-on-DP1 quirk list Reviewed-by: Zaeem Mohamed Signed-off-by: Taimur Hassan Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c989bb9798ddf..7624b909497ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -53,7 +53,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.327" +#define DC_VER "3.2.328" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- GitLab From 9f7ce6a9ab95ed0c3a9aacdde219bb02d127cdfc Mon Sep 17 00:00:00 2001 From: Ruili Ji Date: Mon, 24 Mar 2025 01:08:50 -0400 Subject: [PATCH 119/899] drm/amd/pm: implement dpm vcn reset function Implement VCN engine reset by sending MSG_ResetVCN on smu 13.0.6. v2: fix format for code and message Reviewed-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 15 +++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 ++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 6 ++++++ .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 1 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 15 +++++++++++++++ 7 files changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 9f5768fa97dd0..740ee2435f540 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -789,6 +789,21 @@ int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask) return ret; } +int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_vcn(smu, inst_mask); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 747019ed2bf5c..4445d54f3322f 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -607,5 +607,6 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, enum pp_pm_policy p_type, char *buf); int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev); +int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index a01b6244d99cd..1da9e1287d2e4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3967,3 +3967,11 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } + +int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask) +{ + if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn) + smu->ppt_funcs->dpm_reset_vcn(smu, inst_mask); + + return 0; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 27cecf9688ccb..ae89801adeb8b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1396,6 +1396,11 @@ struct pptable_funcs { */ bool (*reset_sdma_is_supported)(struct smu_context *smu); + /** + * @reset_vcn: message SMU to soft reset vcn instance. + */ + int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask); + /** * @get_ecc_table: message SMU to get ECC INFO table. */ @@ -1659,6 +1664,7 @@ int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); int smu_send_rma_reason(struct smu_context *smu); int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); bool smu_reset_sdma_is_supported(struct smu_context *smu); +int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 288b2576432bd..348d06a3200c2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -94,7 +94,8 @@ #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SetThrottlingPolicy 0x44 #define PPSMC_MSG_ResetSDMA 0x4D -#define PPSMC_Message_Count 0x4E +#define PPSMC_MSG_ResetVCN 0x4E +#define PPSMC_Message_Count 0x4F //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index c9dee09395e3b..eefdaa0b5df65 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -277,6 +277,7 @@ __SMU_DUMMY_MAP(MALLPowerController), \ __SMU_DUMMY_MAP(MALLPowerState), \ __SMU_DUMMY_MAP(ResetSDMA), \ + __SMU_DUMMY_MAP(ResetVCN), \ __SMU_DUMMY_MAP(GetStaticMetricsTable), #undef __SMU_DUMMY_MAP diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 8900c94bda0ec..34ffaa0cfeeb0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -176,6 +176,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), + MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), }; // clang-format on @@ -2941,6 +2942,19 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) +{ + int ret = 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ResetVCN, inst_mask, NULL); + if (ret) + dev_err(smu->adev->dev, + "failed to send ResetVCN event with mask 0x%x\n", + inst_mask); + return ret; +} + + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3615,6 +3629,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .send_rma_reason = smu_v13_0_6_send_rma_reason, .reset_sdma = smu_v13_0_6_reset_sdma, .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, + .dpm_reset_vcn = smu_v13_0_6_reset_vcn, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) -- GitLab From a267d1686c09690cad24704c8d83dfe31fbc8553 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Mar 2025 13:29:36 -0400 Subject: [PATCH 120/899] drm/amdgpu/gfx9: dump full CP packet header FIFOs In dev core dump, dump the full header fifo for each queue. Each FIFO has 8 entries. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 62 +++++++++++++++++++++------ 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d725e2e230a3d..c6125abddbb9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), - SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3), + /* packet headers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP) }; static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { @@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP) }; enum ta_ras_gfx_subblock { @@ -7337,9 +7364,14 @@ static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_9[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_9[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -7376,9 +7408,13 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) soc15_grbm_select(adev, 1 + i, j, k, 0, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_9[reg])); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_9[reg])); } index += reg_count; } -- GitLab From fd4948494dc934ea017bae942ab68d875fcc2ac9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 11:39:20 -0400 Subject: [PATCH 121/899] drm/amdgpu/gfx9.4.3: dump full CP packet header FIFOs In dev core dump, dump the full header fifo for each queue. Each FIFO has 8 entries. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 52 ++++++++++++++++++------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 53fbf6ca7cdb9..9db2bde5c59d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -105,9 +105,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), @@ -154,6 +151,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), }; struct amdgpu_gfx_ras gfx_v9_4_3_ras; @@ -4558,12 +4563,21 @@ static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_pri "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", xcc_id, i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, - "%-50s \t 0x%08x\n", - gc_cp_reg_list_9_4_3[reg].reg_name, - adev->gfx.ip_dump_compute_queues - [xcc_offset + inst_offset + - reg]); + if (i && gc_cp_reg_list_9_4_3[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, + "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + else + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); } inst_offset += reg_count; } @@ -4612,12 +4626,20 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) GET_INST(GC, xcc_id)); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues - [xcc_offset + - inst_offset + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST( - gc_cp_reg_list_9_4_3[reg], - GET_INST(GC, xcc_id))); + if (i && gc_cp_reg_list_9_4_3[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); } inst_offset += reg_count; } -- GitLab From 867cf768cbe3072ff51b4848064311e03bd79604 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 11:30:15 -0400 Subject: [PATCH 122/899] drm/amdgpu/gfx10: dump full CP packet header FIFOs In dev core dump, dump the full header fifo for each queue. Each FIFO has 8 entries. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 64 ++++++++++++++++++++------ 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 792c31411ae3b..f5dcb72a6bf5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), @@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { @@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR), SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI) + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), + /* gfx header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_10_1[] = { @@ -9674,9 +9703,14 @@ static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_10[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -9737,9 +9771,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block) nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_10[reg])); + if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_10[reg])); } index += reg_count; } -- GitLab From eb15a5d1aef59d1ec3aafe9d9f13953deb9977b6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 11:48:26 -0400 Subject: [PATCH 123/899] drm/amdgpu/gfx11: dump full CP packet header FIFOs In dev core dump, dump the full header fifo for each queue. Each FIFO has 8 entries. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 59 +++++++++++++++++++++----- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 7a9ab4c4b2f0d..77df4452979bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -177,9 +177,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -230,7 +234,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { @@ -259,7 +272,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_11_0[] = { @@ -6892,9 +6922,14 @@ static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_11[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -6954,9 +6989,13 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) /* ME0 is for GFX so start from 1 for CP */ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_11[reg])); + RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_11[reg])); } index += reg_count; } -- GitLab From 0e2ebfe2761d3b21b4d534721b13952cb1317902 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 11:58:42 -0400 Subject: [PATCH 124/899] drm/amdgpu/gfx12: dump full CP packet header FIFOs In dev core dump, dump the full header fifo for each queue. Each FIFO has 8 entries. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 41 ++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0490e04ac8a57..b4211e6e64860 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -133,11 +133,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0), SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -186,7 +189,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { @@ -215,7 +227,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { -- GitLab From 6dafb5d4c7cdfc8f994e789d050e29e0d5ca6efd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 10:26:25 -0400 Subject: [PATCH 125/899] drm/amdgpu/pm: add workload profile pause helper To be used for display idle optimizations when we want to pause non-default profiles. Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2a9606118d899..21dc956b5f35d 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -429,6 +429,7 @@ struct amd_pm_funcs { int (*set_pp_table)(void *handle, const char *buf, size_t size); void (*debugfs_print_current_performance_level)(void *handle, struct seq_file *m); int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE type, bool en); + int (*pause_power_profile)(void *handle, bool pause); /* export to amdgpu */ struct amd_vce_state *(*get_vce_clock_state)(void *handle, u32 idx); int (*dispatch_tasks)(void *handle, enum amd_pp_task task_id, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 740ee2435f540..2148c8db5a590 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -377,6 +377,25 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, + bool pause) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret = 0; + + if (amdgpu_sriov_vf(adev)) + return 0; + + if (pp_funcs && pp_funcs->pause_power_profile) { + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->pause_power_profile( + adev->powerplay.pp_handle, pause); + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, uint32_t pstate) { diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 4445d54f3322f..2c3c97587dd5e 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -410,6 +410,8 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en); +int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, + bool pause); int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); -- GitLab From 92e511d1cecc6a8fa7bdfc8657f16ece9ab4d456 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 10:54:56 -0400 Subject: [PATCH 126/899] drm/amdgpu/pm/swsmu: implement pause workload profile Add the callback for implementation for swsmu. Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 36 ++++++++++++++++++- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1da9e1287d2e4..f345c233bc471 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2398,7 +2398,11 @@ static int smu_switch_power_profile(void *handle, smu_power_profile_mode_get(smu, type); else smu_power_profile_mode_put(smu, type); - ret = smu_bump_power_profile_mode(smu, NULL, 0); + /* don't switch the active workload when paused */ + if (smu->pause_workload) + ret = 0; + else + ret = smu_bump_power_profile_mode(smu, NULL, 0); if (ret) { if (enable) smu_power_profile_mode_put(smu, type); @@ -2411,6 +2415,35 @@ static int smu_switch_power_profile(void *handle, return 0; } +static int smu_pause_power_profile(void *handle, + bool pause) +{ + struct smu_context *smu = handle; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + u32 workload_mask = 1 << PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + int ret; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + smu->pause_workload = pause; + + /* force to bootup default profile */ + if (smu->pause_workload && smu->ppt_funcs->set_power_profile_mode) + ret = smu->ppt_funcs->set_power_profile_mode(smu, + workload_mask, + NULL, + 0); + else + ret = smu_bump_power_profile_mode(smu, NULL, 0); + return ret; + } + + return 0; +} + static enum amd_dpm_forced_level smu_get_performance_level(void *handle) { struct smu_context *smu = handle; @@ -3759,6 +3792,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .get_pp_table = smu_sys_get_pp_table, .set_pp_table = smu_sys_set_pp_table, .switch_power_profile = smu_switch_power_profile, + .pause_power_profile = smu_pause_power_profile, /* export to amdgpu */ .dispatch_tasks = smu_handle_dpm_task, .load_firmware = smu_load_microcode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index ae89801adeb8b..a95a3dd5a895e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -560,6 +560,7 @@ struct smu_context { /* asic agnostic workload mask */ uint32_t workload_mask; + bool pause_workload; /* default/user workload preference */ uint32_t power_profile_mode; uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT]; -- GitLab From b23f81c442ac33af0c808b4bb26333b881669bb7 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 28 Mar 2025 10:34:57 +0800 Subject: [PATCH 127/899] drm/amd/display: pause the workload setting in dm Pause the workload setting in dm when doing idle optimization Reviewed-by: Alex Deucher Signed-off-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 87058271b00cc..e8bdd7f0c4607 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -246,6 +246,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -273,8 +275,15 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->acrtc->dm_irq_params.allow_sr_entry); } - if (dm->active_vblank_irq_count == 0) + if (dm->active_vblank_irq_count == 0) { + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); dc_allow_idle_optimizations(dm->dc, true); + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); + } mutex_unlock(&dm->dc_lock); -- GitLab From 906ad451675155380c1dc1881a244ebde8e8df0a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 6 Apr 2025 17:27:24 -0400 Subject: [PATCH 128/899] drm/amdgpu: cancel gfx idle work in device suspend for s0ix This is normally handled in the gfx IP suspend callbacks, but for S0ix, those are skipped because we don't want to touch gfx. So handle it in device suspend. Fixes: b9467983b774 ("drm/amdgpu: add dynamic workload profile switching for gfx10") Fixes: 963537ca2325 ("drm/amdgpu: add dynamic workload profile switching for gfx11") Fixes: 5f95a1549555 ("drm/amdgpu: add dynamic workload profile switching for gfx12") Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1878bbacf0a2f..979942f26dda7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3653,6 +3653,13 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev, adev->ip_blocks[i].version->type)) continue; + /* Since we skip suspend for S0i3, we need to cancel the delayed + * idle work here as the suspend callback never gets called. + */ + if (adev->in_s0ix && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) + cancel_delayed_work_sync(&adev->gfx.idle_work); /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware -- GitLab From 795dbde92fe5c6996a02a5b579481de73035e7bf Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 2 Apr 2025 19:20:57 +0200 Subject: [PATCH 129/899] drm/i915/huc: Fix fence not released on early probe errors HuC delayed loading fence, introduced with commit 27536e03271da ("drm/i915/huc: track delayed HuC load with a fence"), is registered with object tracker early on driver probe but unregistered only from driver remove, which is not called on early probe errors. Since its memory is allocated under devres, then released anyway, it may happen to be allocated again to the fence and reused on future driver probes, resulting in kernel warnings that taint the kernel: <4> [309.731371] ------------[ cut here ]------------ <3> [309.731373] ODEBUG: init destroyed (active state 0) object: ffff88813d7dd2e0 object type: i915_sw_fence hint: sw_fence_dummy_notify+0x0/0x20 [i915] <4> [309.731575] WARNING: CPU: 2 PID: 3161 at lib/debugobjects.c:612 debug_print_object+0x93/0xf0 ... <4> [309.731693] CPU: 2 UID: 0 PID: 3161 Comm: i915_module_loa Tainted: G U 6.14.0-CI_DRM_16362-gf0fd77956987+ #1 ... <4> [309.731700] RIP: 0010:debug_print_object+0x93/0xf0 ... <4> [309.731728] Call Trace: <4> [309.731730] ... <4> [309.731949] __debug_object_init+0x17b/0x1c0 <4> [309.731957] debug_object_init+0x34/0x50 <4> [309.732126] __i915_sw_fence_init+0x34/0x60 [i915] <4> [309.732256] intel_huc_init_early+0x4b/0x1d0 [i915] <4> [309.732468] intel_uc_init_early+0x61/0x680 [i915] <4> [309.732667] intel_gt_common_init_early+0x105/0x130 [i915] <4> [309.732804] intel_root_gt_init_early+0x63/0x80 [i915] <4> [309.732938] i915_driver_probe+0x1fa/0xeb0 [i915] <4> [309.733075] i915_pci_probe+0xe6/0x220 [i915] <4> [309.733198] local_pci_probe+0x44/0xb0 <4> [309.733203] pci_device_probe+0xf4/0x270 <4> [309.733209] really_probe+0xee/0x3c0 <4> [309.733215] __driver_probe_device+0x8c/0x180 <4> [309.733219] driver_probe_device+0x24/0xd0 <4> [309.733223] __driver_attach+0x10f/0x220 <4> [309.733230] bus_for_each_dev+0x7d/0xe0 <4> [309.733236] driver_attach+0x1e/0x30 <4> [309.733239] bus_add_driver+0x151/0x290 <4> [309.733244] driver_register+0x5e/0x130 <4> [309.733247] __pci_register_driver+0x7d/0x90 <4> [309.733251] i915_pci_register_driver+0x23/0x30 [i915] <4> [309.733413] i915_init+0x34/0x120 [i915] <4> [309.733655] do_one_initcall+0x62/0x3f0 <4> [309.733667] do_init_module+0x97/0x2a0 <4> [309.733671] load_module+0x25ff/0x2890 <4> [309.733688] init_module_from_file+0x97/0xe0 <4> [309.733701] idempotent_init_module+0x118/0x330 <4> [309.733711] __x64_sys_finit_module+0x77/0x100 <4> [309.733715] x64_sys_call+0x1f37/0x2650 <4> [309.733719] do_syscall_64+0x91/0x180 <4> [309.733763] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4> [309.733792] ... <4> [309.733806] ---[ end trace 0000000000000000 ]--- That scenario is most easily reproducible with igt@i915_module_load@reload-with-fault-injection. Fix the issue by moving the cleanup step to driver release path. Fixes: 27536e03271da ("drm/i915/huc: track delayed HuC load with a fence") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13592 Cc: Daniele Ceraolo Spurio Cc: Alan Previn Signed-off-by: Janusz Krzysztofik Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Krzysztof Karas Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250402172057.209924-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 11 +++++------ drivers/gpu/drm/i915/gt/uc/intel_huc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index f30c90650b7ec..9659e6a301e19 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -414,12 +419,6 @@ out: void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index d5e441b9e08d6..921ad4b1687f0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,6 +55,7 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 5b8080ec5315b..4f751ce74214d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** -- GitLab From da7dc714a8f8e1c9fc33c57cd63583779a3bef71 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Thu, 20 Mar 2025 12:35:02 +0300 Subject: [PATCH 130/899] drm/amd/pm/smu11: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1e866f1fe528 ("drm/amd/pm: Prevent divide by zero") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 78391d8f35a9c..25fabf336a640 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1204,7 +1204,7 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; - if (speed == 0) + if (!speed || speed > UINT_MAX/8) return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement -- GitLab From 9e7b08d239c2f21e8f417854f81e5ff40edbebff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Mar 2025 17:46:59 -0400 Subject: [PATCH 131/899] drm/amdgpu/mes12: optimize MES pipe FW version fetching Don't fetch it again if we already have it. It seems the registers don't reliably have the value at resume in some cases. Fixes: 785f0f9fe742 ("drm/amdgpu: Add mes v12_0 ip block support (v4)") Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 183dd3346da57..e6ab617b9a404 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1392,17 +1392,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, mes_v12_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); + if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) || + ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) { + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } return 0; } -- GitLab From 160e6f5108f4b629c4614dfd37bb6c16ef522bb4 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 4 Apr 2025 01:22:21 -0400 Subject: [PATCH 132/899] drm/amdgpu: fix typos in DCEs In DCE6, DCE8, DCE10, DCE11, "hdp" is replaced by "hpd" and replace "type" by "hpd" for a uniform parameter naming usage across DCEs. In link_factory.c, there is a missing "p" to "types" Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 4 ++-- .../gpu/drm/amd/display/dc/link/link_factory.c | 2 +- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index df401aded6621..bf7c22f81cda3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3075,7 +3075,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } @@ -3227,7 +3227,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 80f01c3989cdf..47e05783c4a0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3206,7 +3206,7 @@ static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } @@ -3358,7 +3358,7 @@ static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3488,8 +3488,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .set_powergating_state = dce_v11_0_set_powergating_state, }; -static void -dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, +static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 2d85ea7159de8..2a48f8bc6b7cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -287,7 +287,7 @@ static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3007,26 +3007,26 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned hpd, enum amdgpu_interrupt_state state) { u32 dc_hpd_int_cntl; - if (type >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", type); + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); + dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); + dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 07358546581fc..fe86715775255 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -271,7 +271,7 @@ static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3029,7 +3029,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, u32 dc_hpd_int_cntl; if (type >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", type); + DRM_DEBUG("invalid hpd %d\n", type); return 0; } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index f6b6b19e74814..70f54bdbbc64e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -653,7 +653,7 @@ static bool construct_phy(struct dc_link *link, } /* Look for device tag that matches connector signal, - * CRT for rgb, LCD for other supported signal tyes + * CRT for rgb, LCD for other supported signal types */ if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios, link->device_tag.dev_id)) -- GitLab From 9101b84f8c1971401b05a9172a44082bc7a158d9 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 4 Apr 2025 01:22:22 -0400 Subject: [PATCH 133/899] drm/amdgpu: use "irq" in place of "interrupt" in DCE6/8 as in DCE10/11 "interrupt" becomes "irq" in: dce_vX_0_set_hpd_interrupt_state() dce_vX_0_set_crtc_interrupt_state() dce_vX_0_set_pageflip_interrupt_state() It is easier when going through the code to just change the DCE number in the functions' name to find and compare them across DCE versions. Also, it standardizes function mapping inside a given structure where .set and .process are both set to functions with a "_irq" suffix. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 2a48f8bc6b7cb..276c025c4c03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -3005,7 +3005,7 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, } -static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned hpd, enum amdgpu_interrupt_state state) @@ -3035,7 +3035,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, return 0; } -static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3120,7 +3120,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, return 0; } -static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3535,17 +3535,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = { - .set = dce_v6_0_set_crtc_interrupt_state, + .set = dce_v6_0_set_crtc_irq_state, .process = dce_v6_0_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = { - .set = dce_v6_0_set_pageflip_interrupt_state, + .set = dce_v6_0_set_pageflip_irq_state, .process = dce_v6_0_pageflip_irq, }; static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = { - .set = dce_v6_0_set_hpd_interrupt_state, + .set = dce_v6_0_set_hpd_irq_state, .process = dce_v6_0_hpd_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index fe86715775255..e62ccf9eb73de 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3021,7 +3021,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, } } -static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3051,7 +3051,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, return 0; } -static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3136,7 +3136,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, return 0; } -static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3547,17 +3547,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = { - .set = dce_v8_0_set_crtc_interrupt_state, + .set = dce_v8_0_set_crtc_irq_state, .process = dce_v8_0_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = { - .set = dce_v8_0_set_pageflip_interrupt_state, + .set = dce_v8_0_set_pageflip_irq_state, .process = dce_v8_0_pageflip_irq, }; static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = { - .set = dce_v8_0_set_hpd_interrupt_state, + .set = dce_v8_0_set_hpd_irq_state, .process = dce_v8_0_hpd_irq, }; -- GitLab From d526b4efb748d439af68be7d1a8922716a0eb52c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 13:49:00 -0400 Subject: [PATCH 134/899] Documentation: update KIQ documentation KIQ is replaced with MES on GFX 11 and newer. Reviewed-by: shaoyun.liu Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/driver-core.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index 7e3f5d1e9aaf4..81256318e93cf 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -140,7 +140,8 @@ Some useful constructs: KIQ (Kernel Interface Queue) This is a control queue used by the kernel driver to manage other gfx and compute queues on the GFX/compute engine. You can use it to - map/unmap additional queues, etc. + map/unmap additional queues, etc. This is replaced by MES on + GFX 11 and newer hardware. IB (Indirect Buffer) A command buffer for a particular engine. Rather than writing -- GitLab From 9040e657dc1c243b89e089a2fd83a84e041d9706 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 13:50:10 -0400 Subject: [PATCH 135/899] Documenation: fix typo in debugfs.rst In reference to memory carved out for APUs, s/cave out/carve out/ Reviewed-by: shaoyun.liu Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/debugfs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/amdgpu/debugfs.rst b/Documentation/gpu/amdgpu/debugfs.rst index fe7736a0b43a6..5150d0a956581 100644 --- a/Documentation/gpu/amdgpu/debugfs.rst +++ b/Documentation/gpu/amdgpu/debugfs.rst @@ -14,7 +14,7 @@ amdgpu_benchmark Run benchmarks using the DMA engine the driver uses for GPU memory paging. Write a number to the file to run the test. The results are written to the -kernel log. VRAM is on device memory (dGPUs) or cave out (APUs) and GTT +kernel log. VRAM is on device memory (dGPUs) or carve out (APUs) and GTT (Graphics Translation Tables) is system memory that is accessible by the GPU. The following tests are available: -- GitLab From 3394069e7de984ea3a4c2c68bf69b65f1c0aa658 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Wed, 2 Apr 2025 17:35:56 -0400 Subject: [PATCH 136/899] drm/amdgpu: Disable ACA on VFs VFs query RAS error counts directly from host with AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY. When ACA is enabled, an unusable aca_sysfs is created rather than amdgpu_ras_sysfs_create() Likewise, VFs depend on host support to query CPERs, rather than ACA component. Signed-off-by: Victor Skvortsov Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 360e07a5c7c1f..5a234eadae8b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -549,7 +549,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev) { int r; - if (!amdgpu_aca_is_enabled(adev)) + if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) return 0; r = amdgpu_cper_ring_init(adev); @@ -568,7 +568,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev) int amdgpu_cper_fini(struct amdgpu_device *adev) { - if (!amdgpu_aca_is_enabled(adev)) + if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) return 0; adev->cper.enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ebf1f63d04427..5bb7673fd28ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3794,10 +3794,12 @@ init_ras_enabled_flag: adev->ras_hw_enabled & amdgpu_ras_mask; /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */ - adev->aca.is_enabled = - (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + if (!amdgpu_sriov_vf(adev)) { + adev->aca.is_enabled = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + } /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && -- GitLab From 940e772635ac4f8431e82f09ba4023cf31ce249e Mon Sep 17 00:00:00 2001 From: Ruili Ji Date: Mon, 24 Mar 2025 01:15:25 -0400 Subject: [PATCH 137/899] amd/amdgpu: Init vcn hardware per instance for vcn 4.0.3 Add interface for hardware init by vcn instance. v2: fix code format Reviewed-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 51 ++++++++++++++----------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3e176b4b7c69d..a1355a37cef79 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -287,6 +287,31 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst) +{ + int vcn_inst; + struct amdgpu_device *adev = vinst->adev; + struct amdgpu_ring *ring; + int inst_idx = vinst->inst; + + vcn_inst = GET_INST(VCN, inst_idx); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst, + adev->vcn.inst[inst_idx].aid_id); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + } + + return 0; +} + /** * vcn_v4_0_3_hw_init - start and test VCN block * @@ -298,7 +323,8 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; - int i, r, vcn_inst; + struct amdgpu_vcn_inst *vinst; + int i, r; if (amdgpu_sriov_vf(adev)) { r = vcn_v4_0_3_start_sriov(adev); @@ -321,28 +347,9 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { struct amdgpu_vcn4_fw_shared *fw_shared; - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); - } + vinst = &adev->vcn.inst[i]; + vcn_v4_0_3_hw_init_inst(vinst); /* Re-init fw_shared when RAS fatal error occurred */ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; -- GitLab From bb00bf17328d80f519df93bebc41f6c9171547d6 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 1 Apr 2025 15:56:41 +0800 Subject: [PATCH 138/899] drm/amd/amdgpu: decouple ASPM with pcie dpm ASPM doesn't need to be disabled if pcie dpm is disabled. So ASPM can be independantly enabled. Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 979942f26dda7..cfaa5d77b20ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1897,8 +1897,6 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) } if (adev->flags & AMD_IS_APU) return false; - if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) - return false; return pcie_aspm_enabled(adev->pdev); } -- GitLab From b695dd3bb8e806916ba64a62fae918c60004cc4d Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 3 Apr 2025 11:39:49 +0800 Subject: [PATCH 139/899] drm/amdgpu: add loop bits for NPS2 page retirement Support NPS2 RAS. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 0e404c0749753..da00d6b3b6a38 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -220,6 +220,13 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS2_PARTITION_MODE) { + loop_bits[0] = UMC_V12_0_PA_CH5_BIT; + loop_bits[1] = UMC_V12_0_PA_C2_BIT; + loop_bits[2] = UMC_V12_0_PA_B1_BIT; + loop_bits[3] = UMC_V12_0_PA_R12_BIT; + } + if (nps == AMDGPU_NPS4_PARTITION_MODE) { loop_bits[0] = UMC_V12_0_PA_CH4_BIT; loop_bits[1] = UMC_V12_0_PA_CH5_BIT; @@ -517,6 +524,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (adev->gmc.gmc_funcs->query_mem_partition_mode) nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + + if (nps == AMDGPU_NPS2_PARTITION_MODE) + shift_bit = UMC_V12_0_PA_B1_BIT; if (nps == AMDGPU_NPS4_PARTITION_MODE) shift_bit = UMC_V12_0_PA_B0_BIT; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 9298018d938f7..056bbc0383120 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -65,12 +65,14 @@ /* row bits in SOC physical address */ #define UMC_V12_0_PA_R0_BIT 22 #define UMC_V12_0_PA_R11_BIT 33 +#define UMC_V12_0_PA_R12_BIT 34 #define UMC_V12_0_PA_R13_BIT 35 /* channel bit in SOC physical address */ #define UMC_V12_0_PA_CH4_BIT 12 #define UMC_V12_0_PA_CH5_BIT 13 /* bank bit in SOC physical address */ #define UMC_V12_0_PA_B0_BIT 19 +#define UMC_V12_0_PA_B1_BIT 20 /* row bits in MCA address */ #define UMC_V12_0_MA_R0_BIT 10 -- GitLab From 6ffc6e056febb9ebb0d6a8fe0379ac9d8f4ec4a6 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 8 Apr 2025 08:55:33 +0530 Subject: [PATCH 140/899] drm/amdgpu: Reset RAS table if header is invalid If a valid header is not found during RAS eeprom init, consider it as new and reset RAS table info. Signed-off-by: Lijo Lazar Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index e979a6086178c..c985d58fdd7dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1392,6 +1392,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) __decode_table_header_from_buf(hdr, buf); + if (hdr->header != RAS_TABLE_HDR_VAL && + hdr->header != RAS_TABLE_HDR_BAD) { + dev_info(adev->dev, "Creating a new EEPROM table"); + return amdgpu_ras_eeprom_reset_table(control); + } + switch (hdr->version) { case RAS_TABLE_VER_V2_1: case RAS_TABLE_VER_V3: @@ -1429,7 +1435,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - int res; + int res = 0; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -1510,10 +1516,6 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) "User defined threshold is set, runtime service will be halt when threshold is reached\n"); } } - } else { - DRM_INFO("Creating a new EEPROM table"); - - res = amdgpu_ras_eeprom_reset_table(control); } return res < 0 ? res : 0; -- GitLab From a149f0bd0b71fba3af09ccbc027b4f6e977dfe2e Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 4 Apr 2025 01:42:24 -0400 Subject: [PATCH 141/899] drm/amd/display/dc: reclassify DCE6 resources and hw sequencer Classify DCE6 resource and sequencer as they are for other DCE versions Put dce60_resource.c and .h under amd/display/dc/resource/dce60 Put and rename dce60_hw_sequencer.c and .h under amd/display/dc/hwss/dce60 v2: fix build when CONFIG_DRM_AMD_DC_SI=n (Alex) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce60/Makefile | 3 +-- drivers/gpu/drm/amd/display/dc/hwss/Makefile | 26 +++++++++++++------ .../dce60/dce60_hwseq.c} | 2 +- .../dce60/dce60_hwseq.h} | 0 .../gpu/drm/amd/display/dc/resource/Makefile | 26 +++++++++++++------ .../dc/{ => resource}/dce60/dce60_resource.c | 2 +- .../dc/{ => resource}/dce60/dce60_resource.h | 0 7 files changed, 39 insertions(+), 20 deletions(-) rename drivers/gpu/drm/amd/display/dc/{dce60/dce60_hw_sequencer.c => hwss/dce60/dce60_hwseq.c} (99%) rename drivers/gpu/drm/amd/display/dc/{dce60/dce60_hw_sequencer.h => hwss/dce60/dce60_hwseq.h} (100%) rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce60/dce60_resource.c (99%) rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce60/dce60_resource.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile index eede83ad91fa0..824f73eb33266 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile @@ -25,8 +25,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init -DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \ - dce60_resource.o +DCE60 = dce60_timing_generator.o AMD_DAL_DCE60 = $(addprefix $(AMDDALPATH)/dc/dce60/,$(DCE60)) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile index 40ecebea1ba07..bee617ca0838c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile @@ -27,6 +27,24 @@ # DCE ############################################################################### +ifdef CONFIG_DRM_AMD_DC_SI +HWSS_DCE60 = dce60_hwseq.o + +AMD_DAL_HWSS_DCE60 = $(addprefix $(AMDDALPATH)/dc/hwss/dce60/,$(HWSS_DCE60)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE60) +endif + +############################################################################### + +HWSS_DCE80 = dce80_hwseq.o + +AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80) + +############################################################################### + HWSS_DCE = dce_hwseq.o AMD_DAL_HWSS_DCE = $(addprefix $(AMDDALPATH)/dc/hwss/dce/,$(HWSS_DCE)) @@ -65,14 +83,6 @@ AMD_DAL_HWSS_DCE120 = $(addprefix $(AMDDALPATH)/dc/hwss/dce120/,$(HWSS_DCE120)) AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE120) -############################################################################### - -HWSS_DCE80 = dce80_hwseq.o - -AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80)) - -AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80) - ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### # DCN diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c rename to drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c index 44b56490e1524..a08e9f9eec176 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c @@ -26,7 +26,7 @@ #include "dm_services.h" #include "dc.h" #include "core_types.h" -#include "dce60_hw_sequencer.h" +#include "dce60_hwseq.h" #include "dce/dce_hwseq.h" #include "dce110/dce110_hwseq.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h rename to drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile index b8cddef6b3d2d..5b42da8b79c2e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/Makefile +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -27,6 +27,24 @@ # DCE ############################################################################### +ifdef CONFIG_DRM_AMD_DC_SI +RESOURCE_DCE60 = dce60_resource.o + +AMD_DAL_RESOURCE_DCE60 = $(addprefix $(AMDDALPATH)/dc/resource/dce60/,$(RESOURCE_DCE60)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE60) +endif + +############################################################################### + +RESOURCE_DCE80 = dce80_resource.o + +AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80) + +############################################################################### + RESOURCE_DCE100 = dce100_resource.o AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100)) @@ -57,14 +75,6 @@ AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOUR AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120) -############################################################################### - -RESOURCE_DCE80 = dce80_resource.o - -AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80)) - -AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80) - ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### # DCN diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c rename to drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 889f314cac65c..737c1b1d861a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -48,7 +48,7 @@ #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" -#include "dce60/dce60_hw_sequencer.h" +#include "dce60/dce60_hwseq.h" #include "dce100/dce100_resource.h" #include "dce/dce_panel_cntl.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h rename to drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h -- GitLab From b255b6488338abee61b1264469f679e7eb96292a Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 4 Apr 2025 01:42:25 -0400 Subject: [PATCH 142/899] drm/amdgpu: fill in gmc_v6_0_set_clockgating_state() Pretty much was already there, just not ported to amdgpu. Tested-by: Alexandre Demers Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 44 +++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b4567d619d098..8030fcd642106 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -627,17 +627,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, "write" : "read", block, mc_client, mc_id); } -/* static const u32 mc_cg_registers[] = { - MC_HUB_MISC_HUB_CG, - MC_HUB_MISC_SIP_CG, - MC_HUB_MISC_VM_CG, - MC_XPB_CLK_GAT, - ATC_MISC_CG, - MC_CITF_MISC_WR_CG, - MC_CITF_MISC_RD_CG, - MC_CITF_MISC_VM_CG, - VM_L2_CG, + mmMC_HUB_MISC_HUB_CG, + mmMC_HUB_MISC_SIP_CG, + mmMC_HUB_MISC_VM_CG, + mmMC_XPB_CLK_GAT, + mmATC_MISC_CG, + mmMC_CITF_MISC_WR_CG, + mmMC_CITF_MISC_RD_CG, + mmMC_CITF_MISC_VM_CG, + mmVM_L2_CG, }; static const u32 mc_cg_ls_en[] = { @@ -672,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) data |= mc_cg_ls_en[i]; else data &= ~mc_cg_ls_en[i]; @@ -689,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) data |= mc_cg_en[i]; else data &= ~mc_cg_en[i]; @@ -705,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev, orig = data = RREG32_PCIE(ixPCIE_CNTL2); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1); data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1); data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1); @@ -728,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev, orig = data = RREG32(mmHDP_HOST_PATH_CNTL); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0); else data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1); @@ -744,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev, orig = data = RREG32(mmHDP_MEM_POWER_LS); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1); else data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0); @@ -752,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev, if (orig != data) WREG32(mmHDP_MEM_POWER_LS, data); } -*/ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type) { @@ -1098,6 +1096,20 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { + struct amdgpu_device *adev = ip_block->adev; + bool gate = false; + + if (state == AMD_CG_STATE_GATE) + gate = true; + + if (!(adev->flags & AMD_IS_APU)) { + gmc_v6_0_enable_mc_mgcg(adev, gate); + gmc_v6_0_enable_mc_ls(adev, gate); + } + gmc_v6_0_enable_bif_mgls(adev, gate); + gmc_v6_0_enable_hdp_mgcg(adev, gate); + gmc_v6_0_enable_hdp_ls(adev, gate); + return 0; } -- GitLab From 9cfb23021023f2e6c12fabe92bb666277843279f Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 01:52:30 -0400 Subject: [PATCH 143/899] drm/amdgpu: still cleanup sid.h The defines, shifts and masks are already available in dce_6_0_d.h, dce_6_0_sh_mask.h. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 26 +++++++++++++------------- drivers/gpu/drm/amd/amdgpu/sid.h | 16 ---------------- 2 files changed, 13 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 023fe880c9e34..e0f139de79915 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1278,24 +1278,24 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) u32 rom_cntl; bool r; - bus_cntl = RREG32(R600_BUS_CNTL); + bus_cntl = RREG32(mmBUS_CNTL); if (adev->mode_info.num_crtc) { - d1vga_control = RREG32(AVIVO_D1VGA_CONTROL); - d2vga_control = RREG32(AVIVO_D2VGA_CONTROL); + d1vga_control = RREG32(mmD1VGA_CONTROL); + d2vga_control = RREG32(mmD2VGA_CONTROL); vga_render_control = RREG32(mmVGA_RENDER_CONTROL); } rom_cntl = RREG32(R600_ROM_CNTL); /* enable the rom */ - WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS)); + WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK)); if (adev->mode_info.num_crtc) { /* Disable VGA mode */ - WREG32(AVIVO_D1VGA_CONTROL, - (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); - WREG32(AVIVO_D2VGA_CONTROL, - (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); + WREG32(mmD1VGA_CONTROL, + (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK | + D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK))); + WREG32(mmD2VGA_CONTROL, + (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK | + D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK))); WREG32(mmVGA_RENDER_CONTROL, (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK)); } @@ -1304,10 +1304,10 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) r = amdgpu_read_bios(adev); /* restore regs */ - WREG32(R600_BUS_CNTL, bus_cntl); + WREG32(mmBUS_CNTL, bus_cntl); if (adev->mode_info.num_crtc) { - WREG32(AVIVO_D1VGA_CONTROL, d1vga_control); - WREG32(AVIVO_D2VGA_CONTROL, d2vga_control); + WREG32(mmD1VGA_CONTROL, d1vga_control); + WREG32(mmD2VGA_CONTROL, d2vga_control); WREG32(mmVGA_RENDER_CONTROL, vga_render_control); } WREG32(R600_ROM_CNTL, rom_cntl); diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index cd7a531fb3a15..cbd4f8951cfae 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -81,11 +81,6 @@ #define MC_CG_ENABLE (1 << 18) #define MC_LS_ENABLE (1 << 19) -#define MC_SHARED_CHMAP 0x801 -#define NOOFCHAN_SHIFT 12 -#define NOOFCHAN_MASK 0x0000f000 -#define MC_SHARED_CHREMAP 0x802 - #define MC_VM_FB_LOCATION 0x809 #define MC_VM_AGP_TOP 0x80A #define MC_VM_AGP_BOT 0x80B @@ -664,17 +659,6 @@ #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 -#define AVIVO_D1VGA_CONTROL 0x00cc -# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0) -# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8) -# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9) -# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10) -# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16) -# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24) -#define AVIVO_D2VGA_CONTROL 0x00ce - -#define R600_BUS_CNTL 0x1508 -# define R600_BIOS_ROM_DIS (1 << 1) #define R600_ROM_CNTL 0x580 # define R600_SCK_OVERWRITE (1 << 1) -- GitLab From 7eb61c2dffa635e4fb05f89736d8fcf39bb24d42 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Oct 2023 12:17:41 +0200 Subject: [PATCH 144/899] drm/amdgpu: UAPI for user queue management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch intorduces new UAPI/IOCTL for usermode graphics queue. The userspace app will fill this structure and request the graphics driver to add a graphics work queue for it. The output of this UAPI is a queue id. This UAPI maps the queue into GPU, so the graphics app can start submitting work to the queue as soon as the call returns. V2: Addressed review comments from Alex and Christian - Make the doorbell offset's comment clearer - Change the output parameter name to queue_id V3: Integration with doorbell manager V4: - Updated the UAPI doc (Pierre-Eric) - Created a Union for engine specific MQDs (Alex) - Added Christian's R-B V5: - Add variables for GDS and CSA in MQD structure (Alex) - Make MQD data a ptr-size pair instead of union (Alex) V9: - renamed struct drm_amdgpu_userq_mqd_gfx_v11 to struct drm_amdgpu_userq_mqd as its being used for SDMA and compute queues as well V10: - keeping the drm_amdgpu_userq_mqd IP independent, moving the _gfx_v11 objects in a separate structure in other patch. (Alex) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 25d5c6e90a995..53081050cb3ea 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -54,6 +54,7 @@ extern "C" { #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -71,6 +72,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) /** * DOC: memory domains @@ -319,6 +321,94 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* user queue IOCTL */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* Flag to indicate secure buffer related workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) +/* Flag to indicate AQL workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) + +/* + * MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. This + * structure defines the MQD for GFX-V11 IP ver 0. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue handle for USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @flags: flags to indicate special function for queue like secure + * buffer (TMZ). Unused for now. + */ + __u32 flags; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated to this client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: Queue descriptor for USERQ_OP_CREATE + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11). + */ + __u64 mqd_size; +}; + +struct drm_amdgpu_userq_out { + /** Queue handle */ + __u32 queue_id; + /** Flags */ + __u32 flags; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- GitLab From bf33cb6551a8c5b9d68d2983678e1a05c89428c5 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 26 Aug 2024 23:04:13 +0530 Subject: [PATCH 145/899] drm/amdgpu: add usermode queue base code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds IP independent skeleton code for amdgpu usermode queue. It contains: - A new files with init functions of usermode queues. - A queue context manager in driver private data. V1: Worked on design review comments from RFC patch series: (https://patchwork.freedesktop.org/series/112214/) - Alex: Keep a list of queues, instead of single queue per process. - Christian: Use the queue manager instead of global ptrs, Don't keep the queue structure in amdgpu_ctx V2: - Reformatted code, split the big patch into two V3: - Integration with doorbell manager V4: - Align the structure member names to the largest member's column (Luben) - Added SPDX license (Luben) V5: - Do not add amdgpu.h in amdgpu_userqueue.h (Christian). - Move struct amdgpu_userq_mgr into amdgpu_userqueue.h (Christian). V6: Rebase V9: Rebase V10: Rebase + Alex's R-B Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 40 ++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 61 +++++++++++++++++++ 6 files changed, 113 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c create mode 100644 drivers/gpu/drm/amd/include/amdgpu_userqueue.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index aacc810cabb39..69b56faa2d711 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -253,6 +253,8 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o +# add gfx usermode queue +amdgpu-y += amdgpu_userqueue.o ifneq ($(CONFIG_HSA_AMD),) AMDKFD_PATH := ../amdkfd diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8316f93c1cce3..02511b6eb6afe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -113,6 +113,7 @@ #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" +#include "amdgpu_userqueue.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -502,6 +503,7 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + struct amdgpu_userq_mgr userq_mgr; /** GPU partition selection */ uint32_t xcp_id; }; @@ -1090,6 +1092,7 @@ struct amdgpu_device { bool enable_uni_mes; struct amdgpu_mes mes; struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM]; + const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM]; /* df */ struct amdgpu_df df; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2ca04d6aaa822..1d41e45a317ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -51,6 +51,7 @@ #include "amdgpu_reset.h" #include "amdgpu_sched.h" #include "amdgpu_xgmi.h" +#include "amdgpu_userqueue.h" #include "../amdxcp/amdgpu_xcp_drv.h" /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 27bfe9c8af068..1fcf0ef063153 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" #include "amdgpu_reset.h" #include "amd_pcie.h" +#include "amdgpu_userqueue.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { @@ -1378,6 +1379,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); + r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev); + if (r) + DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n"); + file_priv->driver_priv = fpriv; goto out_suspend; @@ -1447,6 +1452,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); if (pasid) amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c new file mode 100644 index 0000000000000..effc0c7c02cfa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" + +int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) +{ + mutex_init(&userq_mgr->userq_mutex); + idr_init_base(&userq_mgr->userq_idr, 1); + userq_mgr->adev = adev; + + return 0; +} + +void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) +{ + idr_destroy(&userq_mgr->userq_idr); + mutex_destroy(&userq_mgr->userq_mutex); +} diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h new file mode 100644 index 0000000000000..93ebe4b61682e --- /dev/null +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_USERQUEUE_H_ +#define AMDGPU_USERQUEUE_H_ + +#define AMDGPU_MAX_USERQ_COUNT 512 + +struct amdgpu_mqd_prop; + +struct amdgpu_usermode_queue { + int queue_type; + uint64_t doorbell_handle; + uint64_t doorbell_index; + uint64_t flags; + struct amdgpu_mqd_prop *userq_prop; + struct amdgpu_userq_mgr *userq_mgr; + struct amdgpu_vm *vm; +}; + +struct amdgpu_userq_funcs { + int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args, + struct amdgpu_usermode_queue *queue); + void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *uq); +}; + +/* Usermode queues for gfx */ +struct amdgpu_userq_mgr { + struct idr userq_idr; + struct mutex userq_mutex; + struct amdgpu_device *adev; +}; + +int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev); + +void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); + +#endif -- GitLab From 5501117d24a38dadff3dbd8d3102559b27929668 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 10 Oct 2023 12:17:43 +0200 Subject: [PATCH 146/899] drm/amdgpu: add new IOCTL for usermode queue This patch adds: - A new IOCTL function to create and destroy - A new structure to keep all the user queue data in one place. - A function to generate unique index for the queue. V1: Worked on review comments from RFC patch series: - Alex: Keep a list of queues, instead of single queue per process. - Christian: Use the queue manager instead of global ptrs, Don't keep the queue structure in amdgpu_ctx V2: Worked on review comments: - Christian: - Formatting of text - There is no need for queuing of userqueues, with idr in place - Alex: - Remove use_doorbell, its unnecessary - Reuse amdgpu_mqd_props for saving mqd fields - Code formatting and re-arrangement V3: - Integration with doorbell manager V4: - Accommodate MQD union related changes in UAPI (Alex) - Do not set the queue size twice (Bas) V5: - Remove wrapper functions for queue indexing (Christian) - Do not save the queue id/idr in queue itself (Christian) - Move the idr allocation in the IP independent generic space (Christian) V6: - Check the validity of input IP type (Christian) V7: - Move uq_func from uq_mgr to adev (Alex) - Add missing free(queue) for error cases (Yifan) V9: - Rebase V10: Addressed review comments from Christian, and added R-B: - Do not initialize the local variable - Convert DRM_ERROR to DEBUG. V11: - check the input flags to be zero (Alex) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 120 ++++++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 2 + 3 files changed, 123 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1d41e45a317ed..dd2ec890d186c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2948,6 +2948,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index effc0c7c02cfa..cf7fe68d9277e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -23,6 +23,126 @@ */ #include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_userqueue.h" + +static struct amdgpu_usermode_queue * +amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) +{ + return idr_find(&uq_mgr->userq_idr, qid); +} + +static int +amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs; + struct amdgpu_usermode_queue *queue; + + mutex_lock(&uq_mgr->userq_mutex); + + queue = amdgpu_userqueue_find(uq_mgr, queue_id); + if (!queue) { + DRM_DEBUG_DRIVER("Invalid queue id to destroy\n"); + mutex_unlock(&uq_mgr->userq_mutex); + return -EINVAL; + } + + uq_funcs = adev->userq_funcs[queue->queue_type]; + uq_funcs->mqd_destroy(uq_mgr, queue); + idr_remove(&uq_mgr->userq_idr, queue_id); + kfree(queue); + + mutex_unlock(&uq_mgr->userq_mutex); + return 0; +} + +static int +amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs; + struct amdgpu_usermode_queue *queue; + int qid, r = 0; + + if (args->in.flags) { + DRM_ERROR("Usermode queue flags not supported yet\n"); + return -EINVAL; + } + + mutex_lock(&uq_mgr->userq_mutex); + + uq_funcs = adev->userq_funcs[args->in.ip_type]; + if (!uq_funcs) { + DRM_ERROR("Usermode queue is not supported for this IP (%u)\n", args->in.ip_type); + r = -EINVAL; + goto unlock; + } + + queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL); + if (!queue) { + DRM_ERROR("Failed to allocate memory for queue\n"); + r = -ENOMEM; + goto unlock; + } + queue->doorbell_handle = args->in.doorbell_handle; + queue->doorbell_index = args->in.doorbell_offset; + queue->queue_type = args->in.ip_type; + queue->flags = args->in.flags; + queue->vm = &fpriv->vm; + + r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); + if (r) { + DRM_ERROR("Failed to create Queue\n"); + kfree(queue); + goto unlock; + } + + qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); + if (qid < 0) { + DRM_ERROR("Failed to allocate a queue id\n"); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + r = -ENOMEM; + goto unlock; + } + args->out.queue_id = qid; + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); + return r; +} + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_userq *args = data; + int r; + + switch (args->in.op) { + case AMDGPU_USERQ_OP_CREATE: + r = amdgpu_userqueue_create(filp, args); + if (r) + DRM_ERROR("Failed to create usermode queue\n"); + break; + + case AMDGPU_USERQ_OP_FREE: + r = amdgpu_userqueue_destroy(filp, args->in.queue_id); + if (r) + DRM_ERROR("Failed to destroy usermode queue\n"); + break; + + default: + DRM_DEBUG_DRIVER("Invalid user queue op specified: %d\n", args->in.op); + return -EINVAL; + } + + return r; +} int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 93ebe4b61682e..b739274c72e15 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -54,6 +54,8 @@ struct amdgpu_userq_mgr { struct amdgpu_device *adev; }; +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); -- GitLab From 0385800c2ff7bddf16a9caad5840f4e4823253f2 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 10 Oct 2023 12:17:44 +0200 Subject: [PATCH 147/899] drm/amdgpu: add helpers to create userqueue object This patch introduces amdgpu_userqueue_object and its helper functions to creates and destroy this object. The helper functions creates/destroys a base amdgpu_bo, kmap/unmap it and save the respective GPU and CPU addresses in the encapsulating userqueue object. These helpers will be used to create/destroy userqueue MQD, WPTR and FW areas. V7: - Forked out this new patch from V11-gfx-userqueue patch to prevent that patch from growing very big. - Using amdgpu_bo_create instead of amdgpu_bo_create_kernel in prep for eviction fences (Christian) V9: - Rebase V10: - Added Alex's R-B Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 62 +++++++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 13 ++++ 2 files changed, 75 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index cf7fe68d9277e..501324dde3431 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -32,6 +32,68 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) return idr_find(&uq_mgr->userq_idr, qid); } +int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj, + int size) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + bp.type = ttm_bo_type_kernel; + bp.size = size; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); + if (r) { + DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); + return r; + } + + r = amdgpu_bo_reserve(userq_obj->obj, true); + if (r) { + DRM_ERROR("Failed to reserve BO to map (%d)", r); + goto free_obj; + } + + r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); + if (r) { + DRM_ERROR("Failed to alloc GART for userqueue object (%d)", r); + goto unresv; + } + + r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); + if (r) { + DRM_ERROR("Failed to map BO for userqueue (%d)", r); + goto unresv; + } + + userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); + amdgpu_bo_unreserve(userq_obj->obj); + memset(userq_obj->cpu_ptr, 0, size); + return 0; + +unresv: + amdgpu_bo_unreserve(userq_obj->obj); + +free_obj: + amdgpu_bo_unref(&userq_obj->obj); + return r; +} + +void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj) +{ + amdgpu_bo_kunmap(userq_obj->obj); + amdgpu_bo_unref(&userq_obj->obj); +} + static int amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index b739274c72e15..bbd29f68b8d40 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -29,6 +29,12 @@ struct amdgpu_mqd_prop; +struct amdgpu_userq_obj { + void *cpu_ptr; + uint64_t gpu_addr; + struct amdgpu_bo *obj; +}; + struct amdgpu_usermode_queue { int queue_type; uint64_t doorbell_handle; @@ -37,6 +43,7 @@ struct amdgpu_usermode_queue { struct amdgpu_mqd_prop *userq_prop; struct amdgpu_userq_mgr *userq_mgr; struct amdgpu_vm *vm; + struct amdgpu_userq_obj mqd; }; struct amdgpu_userq_funcs { @@ -60,4 +67,10 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); +int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj, + int size); + +void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj); #endif -- GitLab From fbf136b932358da1c65eb6fedd064a33a7a96aaa Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 26 Aug 2024 23:12:21 +0530 Subject: [PATCH 148/899] drm/amdgpu: create MES-V11 usermode queue for GFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Memory queue descriptor (MQD) of a userqueue defines it in the hw's context. As MQD format can vary between different graphics IPs, we need gfx GEN specific handlers to create MQDs. This patch: - Adds a new file which will be used for MES based userqueue functions targeting GFX and SDMA IP. - Introduces MQD handler functions for the usermode queues. V1: Worked on review comments from Alex: - Make MQD functions GEN and IP specific V2: Worked on review comments from Alex: - Reuse the existing adev->mqd[ip] for MQD creation - Formatting and arrangement of code V3: - Integration with doorbell manager V4: Review comments addressed: - Do not create a new file for userq, reuse gfx_v11_0.c (Alex) - Align name of structure members (Luben) - Don't break up the Cc tag list and the Sob tag list in commit message (Luben) V5: - No need to reserve the bo for MQD (Christian). - Some more changes to support IP specific MQD creation. V6: - Add a comment reminding us to replace the amdgpu_bo_create_kernel() calls while creating MQD object to amdgpu_bo_create() once eviction fences are ready (Christian). V7: - Re-arrange userqueue functions in adev instead of uq_mgr (Alex) - Use memdup_user instead of copy_from_user (Christian) V9: - Moved userqueue code from gfx_v11_0.c to new file mes_v11_0.c so that it can be reused for SDMA userqueues as well (Shashank, Alex) V10: Addressed review comments from Alex - Making this patch independent of IP engine(GFX/SDMA/Compute) and specific to MES V11 only, using the generic MQD structure. - Splitting a spearate patch to enabling GFX support from here. - Verify mqd va address to be non-NULL. - Add a separate header file. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 98 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h | 30 ++++++ 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c create mode 100644 drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 69b56faa2d711..f42b9b4be9a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -174,7 +174,8 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ - mes_v12_0.o + mes_v12_0.o \ + mes_v11_0_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c new file mode 100644 index 0000000000000..63fd48a5b8b0b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "v11_structs.h" +#include "mes_v11_0.h" +#include "mes_v11_0_userqueue.h" + +static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args_in, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; + struct drm_amdgpu_userq_in *mqd_user = args_in; + struct amdgpu_mqd_prop *userq_props; + int r; + + /* Structure to initialize MQD for userqueue using generic MQD init function */ + userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); + if (!userq_props) { + DRM_ERROR("Failed to allocate memory for userq_props\n"); + return -ENOMEM; + } + + if (!mqd_user->wptr_va || !mqd_user->rptr_va || + !mqd_user->queue_va || mqd_user->queue_size == 0) { + DRM_ERROR("Invalid MQD parameters for userqueue\n"); + r = -EINVAL; + goto free_props; + } + + r = amdgpu_userqueue_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); + if (r) { + DRM_ERROR("Failed to create MQD object for userqueue\n"); + goto free_props; + } + + /* Initialize the MQD BO with user given values */ + userq_props->wptr_gpu_addr = mqd_user->wptr_va; + userq_props->rptr_gpu_addr = mqd_user->rptr_va; + userq_props->queue_size = mqd_user->queue_size; + userq_props->hqd_base_gpu_addr = mqd_user->queue_va; + userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; + userq_props->use_doorbell = true; + + queue->userq_prop = userq_props; + + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); + if (r) { + DRM_ERROR("Failed to initialize MQD for userqueue\n"); + goto free_mqd; + } + + return 0; + +free_mqd: + amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + +free_props: + kfree(userq_props); + + return r; +} + +static void +mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + kfree(queue->userq_prop); + amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); +} + +const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = { + .mqd_create = mes_v11_0_userq_mqd_create, + .mqd_destroy = mes_v11_0_userq_mqd_destroy, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h new file mode 100644 index 0000000000000..2c102361ca821 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef MES_V11_0_USERQ_H +#define MES_V11_0_USERQ_H +#include "amdgpu_userqueue.h" + +extern const struct amdgpu_userq_funcs userq_mes_v11_0_funcs; +#endif -- GitLab From defb41e8ef3a7d0421009de58d74da54ca25da76 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 2 May 2024 13:46:06 +0200 Subject: [PATCH 149/899] drm/amdgpu: create context space for usermode queue The MES FW expects us to allocate at least one page as context space to process gang and process related context data. This patch creates a joint object for the same, and calculates GPU space offsets of these spaces. V1: Addressed review comments on RFC patch: Alex: Make this function IP specific V2: Addressed review comments from Christian - Allocate only one object for total FW space, and calculate offsets for each of these objects. V3: Integration with doorbell manager V4: Review comments: - Remove shadow from FW space list from cover letter (Alex) - Alignment of macro (Luben) V5: Merged patches 5 and 6 into this single patch Addressed review comments: - Use lower_32_bits instead of mask (Christian) - gfx_v11_0 instead of gfx_v11 in function names (Alex) - Shadow and GDS objects are now coming from userspace (Christian, Alex) V6: - Add a comment to replace amdgpu_bo_create_kernel() with amdgpu_bo_create() during fw_ctx object creation (Christian). - Move proc_ctx_gpu_addr, gang_ctx_gpu_addr and fw_ctx_gpu_addr out of generic queue structure and make it gen11 specific (Alex). V7: - Using helper function to create/destroy userqueue objects. - Removed FW object space allocation. V8: - Updating FW object address from user values. V9: - uppdated function name from gfx_v11_* to mes_v11_* V10: - making this patch independent of IP based changes, moving any GFX object related changes in GFX specific patch (Alex) Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 33 +++++++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 2 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 63fd48a5b8b0b..2486ea2d72fe0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -27,6 +27,31 @@ #include "mes_v11_0.h" #include "mes_v11_0_userqueue.h" +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE + +static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *mqd_user) +{ + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r, size; + + /* + * The FW expects at least one page space allocated for + * process ctx and gang ctx each. Create an object + * for the same. + */ + size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; + r = amdgpu_userqueue_create_object(uq_mgr, ctx, size); + if (r) { + DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); + return r; + } + + return 0; +} + static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -73,6 +98,13 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* Create BO for FW operations */ + r = mes_v11_0_userq_create_ctx_space(uq_mgr, queue, mqd_user); + if (r) { + DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); + goto free_mqd; + } + return 0; free_mqd: @@ -88,6 +120,7 @@ static void mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { + amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); } diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index bbd29f68b8d40..643f31474bd8f 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -44,6 +44,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_mgr *userq_mgr; struct amdgpu_vm *vm; struct amdgpu_userq_obj mqd; + struct amdgpu_userq_obj fw_obj; }; struct amdgpu_userq_funcs { -- GitLab From 6c42559f70c57590681563ebf325bd3cfd6cd13c Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 2 May 2024 12:13:37 +0200 Subject: [PATCH 150/899] drm/amdgpu: map usermode queue into MES This patch adds new functions to map/unmap a usermode queue into the FW, using the MES ring. As soon as this mapping is done, the queue would be considered ready to accept the workload. V1: Addressed review comments from Alex on the RFC patch series - Map/Unmap should be IP specific. V2: Addressed review comments from Christian: - Fix the wptr_mc_addr calculation (moved into another patch) Addressed review comments from Alex: - Do not add fptrs for map/unmap V3: Integration with doorbell manager V4: Rebase V5: Use gfx_v11_0 for function names (Alex) V6: Removed queue->proc/gang/fw_ctx_address variables and doing the address calculations locally to keep the queue structure GEN independent (Alex) V7: Added R-B from Alex V8: Rebase V9: Rebase V10: Rebase Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 2486ea2d72fe0..a1bc6f4889288 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -30,6 +30,69 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE +static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct amdgpu_mqd_prop *userq_props) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + struct mes_add_queue_input queue_input; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); + + queue_input.process_va_start = 0; + queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + + /* set process quantum to 10 ms and gang quantum to 1 ms as default */ + queue_input.process_quantum = 100000; + queue_input.gang_quantum = 10000; + queue_input.paging = false; + + queue_input.process_context_addr = ctx->gpu_addr; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + + queue_input.process_id = queue->vm->pasid; + queue_input.queue_type = queue->queue_type; + queue_input.mqd_addr = queue->mqd.gpu_addr; + queue_input.wptr_addr = userq_props->wptr_gpu_addr; + queue_input.queue_size = userq_props->queue_size >> 2; + queue_input.doorbell_offset = userq_props->doorbell_index; + queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); + return r; + } + + DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); + return 0; +} + +static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_remove_queue_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); + queue_input.doorbell_offset = queue->doorbell_index; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) + DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); +} + static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, struct drm_amdgpu_userq_in *mqd_user) @@ -105,8 +168,18 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* Map userqueue into FW using MES */ + r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); + if (r) { + DRM_ERROR("Failed to init MQD\n"); + goto free_ctx; + } + return 0; +free_ctx: + amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); + free_mqd: amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); @@ -120,6 +193,7 @@ static void mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { + mes_v11_0_userq_unmap(uq_mgr, queue); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); -- GitLab From 5fb2f7fc21a3668e5794cc0d153641b9719713e1 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 22 Apr 2024 19:21:20 +0200 Subject: [PATCH 151/899] drm/amdgpu: map wptr BO into GART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. V7: Addressed review comments from Christian: - Do not take vm->eviction_lock - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset V8: Rebase V9: Changed the function names from gfx_v11* to mes_v11* V10: Remove unused adev (Harish) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 76 +++++++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 2 files changed, 77 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index a1bc6f4889288..90511abaef053 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -30,6 +30,73 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE +static int +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_v11_0_map_gtt_bo_to_gart(wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + return 0; +} + static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, struct amdgpu_mqd_prop *userq_props) @@ -61,6 +128,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.queue_size = userq_props->queue_size >> 2; queue_input.doorbell_offset = userq_props->doorbell_index; queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); @@ -168,6 +236,13 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + /* Map userqueue into FW using MES */ r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); if (r) { @@ -194,6 +269,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { mes_v11_0_userq_unmap(uq_mgr, queue); + amdgpu_bo_unref(&queue->wptr_obj.obj); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 643f31474bd8f..ffe8a3d737560 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -45,6 +45,7 @@ struct amdgpu_usermode_queue { struct amdgpu_vm *vm; struct amdgpu_userq_obj mqd; struct amdgpu_userq_obj fw_obj; + struct amdgpu_userq_obj wptr_obj; }; struct amdgpu_userq_funcs { -- GitLab From f09c1e6077abd1bc2ddd2b97e1135215801ca7f9 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 9 May 2024 14:17:13 +0200 Subject: [PATCH 152/899] drm/amdgpu: generate doorbell index for userqueue The userspace sends us the doorbell object and the relative doobell index in the object to be used for the usermode queue, but the FW expects the absolute doorbell index on the PCI BAR in the MQD. This patch adds a function to convert this relative doorbell index to absolute doorbell index. V5: Fix the db object reference leak (Christian) V6: Pin the doorbell bo in userqueue_create() function, and unpin it in userqueue destoy (Christian) V7: Added missing kfree for queue in error cases Added Alex's R-B V8: Rebase V9: Changed the function names from gfx_v11* to mes_v11* V10: Rebase V11: Rebase Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 59 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 1 + .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 3 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 501324dde3431..3c9f804478d52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -94,6 +94,53 @@ void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, amdgpu_bo_unref(&userq_obj->obj); } +static uint64_t +amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_file *filp, + uint32_t doorbell_offset) +{ + uint64_t index; + struct drm_gem_object *gobj; + struct amdgpu_userq_obj *db_obj = &queue->db_obj; + int r; + + gobj = drm_gem_object_lookup(filp, queue->doorbell_handle); + if (gobj == NULL) { + DRM_ERROR("Can't find GEM object for doorbell\n"); + return -EINVAL; + } + + db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + drm_gem_object_put(gobj); + + /* Pin the BO before generating the index, unpin in queue destroy */ + r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); + if (r) { + DRM_ERROR("[Usermode queues] Failed to pin doorbell object\n"); + goto unref_bo; + } + + r = amdgpu_bo_reserve(db_obj->obj, true); + if (r) { + DRM_ERROR("[Usermode queues] Failed to pin doorbell object\n"); + goto unpin_bo; + } + + index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, + doorbell_offset, sizeof(u64)); + DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index); + amdgpu_bo_unreserve(db_obj->obj); + return index; + +unpin_bo: + amdgpu_bo_unpin(db_obj->obj); + +unref_bo: + amdgpu_bo_unref(&db_obj->obj); + return r; +} + static int amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) { @@ -114,6 +161,8 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) uq_funcs = adev->userq_funcs[queue->queue_type]; uq_funcs->mqd_destroy(uq_mgr, queue); + amdgpu_bo_unpin(queue->db_obj.obj); + amdgpu_bo_unref(&queue->db_obj.obj); idr_remove(&uq_mgr->userq_idr, queue_id); kfree(queue); @@ -129,6 +178,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; + uint64_t index; int qid, r = 0; if (args->in.flags) { @@ -157,6 +207,15 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) queue->flags = args->in.flags; queue->vm = &fpriv->vm; + /* Convert relative doorbell offset into absolute doorbell index */ + index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, filp, args->in.doorbell_offset); + if (index == (uint64_t)-EINVAL) { + DRM_ERROR("Failed to get doorbell for queue\n"); + kfree(queue); + goto unlock; + } + queue->doorbell_index = index; + r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); if (r) { DRM_ERROR("Failed to create Queue\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 90511abaef053..bc9ce5233a7d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -220,6 +220,7 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->hqd_base_gpu_addr = mqd_user->queue_va; userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; userq_props->use_doorbell = true; + userq_props->doorbell_index = queue->doorbell_index; queue->userq_prop = userq_props; diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index ffe8a3d737560..a653e31350c54 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -44,6 +44,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_mgr *userq_mgr; struct amdgpu_vm *vm; struct amdgpu_userq_obj mqd; + struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; }; -- GitLab From d84607e3f7064e85a37cf38710215496ad48e7e8 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 10 Oct 2023 12:17:50 +0200 Subject: [PATCH 153/899] drm/amdgpu: cleanup leftover queues This patch adds code to cleanup any leftover userqueues which a user might have missed to destroy due to a crash or any other programming error. V7: Added Alex's R-B V8: Rebase V9: Rebase V10: Rebase V11: Rebase Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Suggested-by: Bas Nieuwenhuizen Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 3c9f804478d52..64a063ec3b27e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -26,6 +26,19 @@ #include "amdgpu_vm.h" #include "amdgpu_userqueue.h" +static void +amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + int queue_id) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + + uq_funcs->mqd_destroy(uq_mgr, queue); + idr_remove(&uq_mgr->userq_idr, queue_id); + kfree(queue); +} + static struct amdgpu_usermode_queue * amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { @@ -146,8 +159,6 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) { struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; - struct amdgpu_device *adev = uq_mgr->adev; - const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; mutex_lock(&uq_mgr->userq_mutex); @@ -159,13 +170,9 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) return -EINVAL; } - uq_funcs = adev->userq_funcs[queue->queue_type]; - uq_funcs->mqd_destroy(uq_mgr, queue); amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); - idr_remove(&uq_mgr->userq_idr, queue_id); - kfree(queue); - + amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); return 0; } @@ -276,6 +283,12 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { + uint32_t queue_id; + struct amdgpu_usermode_queue *queue; + + idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) + amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id); + idr_destroy(&userq_mgr->userq_idr); mutex_destroy(&userq_mgr->userq_mutex); } -- GitLab From a1d201e16940775f0e2f0230960d69e40879ec6d Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 2 May 2024 12:37:30 +0200 Subject: [PATCH 154/899] drm/amdgpu: enable GFX-V11 userqueue support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables GFX-v11 IP support in the usermode queue base code. It typically: - adds a GFX_v11 specific MQD structure - sets IP functions to create and destroy MQDs - sets MQD objects coming from userspace V10: introduced this spearate patch for GFX V11 enabling (Alex). V11: Addressed review comments: - update the comments in GFX mqd structure informing user about using the INFO IOCTL for object sizes (Alex) - rename struct drm_amdgpu_userq_mqd_gfx_v11 to drm_amdgpu_userq_mqd_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 6 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++ .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 28 +++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 19 +++++++++++++ 4 files changed, 56 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 64a063ec3b27e..5cb984c509c2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -188,6 +188,12 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) uint64_t index; int qid, r = 0; + /* Usermode queues are only supported for GFX IP as of now */ + if (args->in.ip_type != AMDGPU_HW_IP_GFX) { + DRM_ERROR("Usermode queue doesn't support IP type %u\n", args->in.ip_type); + return -EINVAL; + } + if (args->in.flags) { DRM_ERROR("Usermode queue flags not supported yet\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 77df4452979bd..ec487fbeaec56 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,6 +48,7 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" +#include "mes_v11_0_userqueue.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -1613,6 +1614,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1626,6 +1628,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index bc9ce5233a7d3..bcfa0d1ef7bf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -180,6 +180,34 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return r; } + /* Shadow, GDS and CSA objects come directly from userspace */ + if (mqd_user->ip_type == AMDGPU_HW_IP_GFX) { + struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + return -EINVAL; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + mqd->shadow_base_lo = mqd_gfx_v11->shadow_va & 0xFFFFFFFC; + mqd->shadow_base_hi = upper_32_bits(mqd_gfx_v11->shadow_va); + + mqd->gds_bkup_base_lo = mqd_gfx_v11->gds_va & 0xFFFFFFFC; + mqd->gds_bkup_base_hi = upper_32_bits(mqd_gfx_v11->gds_va); + + mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC; + mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va); + kfree(mqd_gfx_v11); + } + return 0; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 53081050cb3ea..4e07e15d5076c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -409,6 +409,25 @@ union drm_amdgpu_userq { struct drm_amdgpu_userq_out out; }; +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 gds_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- GitLab From 543b6145377458b5ec0d1440606c31db62867bf4 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 27 Aug 2024 14:52:07 +0530 Subject: [PATCH 155/899] drm/amdgpu: enable SDMA usermode queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does necessary modifications to enable the SDMA usermode queues using the existing userqueue infrastructure. V9: introduced this patch in the series V10: use header file instead of extern (Alex) V11: rename drm_amdgpu_userq_mqd_sdma_gfx_v11 to drm_amdgpu_userq_mqd_sdma_gfx11 (Marek) Cc: Christian König Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 10 ++++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 5cb984c509c2f..2c5747cc492e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -189,7 +189,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) int qid, r = 0; /* Usermode queues are only supported for GFX IP as of now */ - if (args->in.ip_type != AMDGPU_HW_IP_GFX) { + if (args->in.ip_type != AMDGPU_HW_IP_GFX && args->in.ip_type != AMDGPU_HW_IP_DMA) { DRM_ERROR("Usermode queue doesn't support IP type %u\n", args->in.ip_type); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index bcfa0d1ef7bf2..dc53597427746 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -206,6 +206,24 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC; mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va); kfree(mqd_gfx_v11); + } else if (mqd_user->ip_type == AMDGPU_HW_IP_DMA) { + struct v11_sdma_mqd *mqd = queue->mqd.cpu_ptr; + struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid SDMA MQD\n"); + return -EINVAL; + } + + mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_sdma_v11)) { + DRM_ERROR("Failed to read sdma user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + mqd->sdmax_rlcx_csa_addr_lo = mqd_sdma_v11->csa_va & 0xFFFFFFFC; + mqd->sdmax_rlcx_csa_addr_hi = upper_32_bits(mqd_sdma_v11->csa_va); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index c214c3d2149bd..f163b253c2003 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,6 +43,7 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" +#include "mes_v11_0_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -1376,6 +1377,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs; + r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4e07e15d5076c..6ae9885740842 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -428,6 +428,16 @@ struct drm_amdgpu_userq_mqd_gfx11 { __u64 csa_va; }; +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- GitLab From 2c695d7c072067cd53fb52e52aa0b48277120314 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 9 May 2024 14:31:15 +0200 Subject: [PATCH 156/899] drm/amdgpu: enable compute/gfx usermode queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does the necessary changes required to enable compute workload support using the existing usermode queues infrastructure. V9: Patch introduced V10: Add custom IP specific mqd strcuture for compute (Alex) V11: Rename drm_amdgpu_userq_mqd_compute_gfx_v11 to drm_amdgpu_userq_mqd_compute_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 4 +++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 ++ .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 23 +++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 10 ++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 2c5747cc492e4..5173718c38483 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -189,7 +189,9 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) int qid, r = 0; /* Usermode queues are only supported for GFX IP as of now */ - if (args->in.ip_type != AMDGPU_HW_IP_GFX && args->in.ip_type != AMDGPU_HW_IP_DMA) { + if (args->in.ip_type != AMDGPU_HW_IP_GFX && + args->in.ip_type != AMDGPU_HW_IP_DMA && + args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { DRM_ERROR("Usermode queue doesn't support IP type %u\n", args->in.ip_type); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ec487fbeaec56..ed57c3e4c0c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1615,6 +1615,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1629,6 +1630,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index dc53597427746..e70b8e429e9c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -268,6 +268,29 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->use_doorbell = true; userq_props->doorbell_index = queue->doorbell_index; + if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { + struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; + + if (mqd_user->mqd_size != sizeof(*compute_mqd)) { + DRM_ERROR("Invalid compute IP MQD size\n"); + r = -EINVAL; + goto free_mqd; + } + + compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(compute_mqd)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->eop_gpu_addr = compute_mqd->eop_va; + userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; + userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; + userq_props->hqd_active = false; + kfree(compute_mqd); + } + queue->userq_prop = userq_props; r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6ae9885740842..59f0818e8dcda 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -438,6 +438,16 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { __u64 csa_va; }; +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and must be at least 1 page + * sized. + */ + __u64 eop_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- GitLab From 9d3afcb7b9f950b9b7c58ceeeb9e71f3476e69ed Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 27 Aug 2024 15:29:49 +0530 Subject: [PATCH 157/899] drm/amdgpu: fix MES GFX mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current MES GFX mask prevents FW to enable oversubscription. This patch does the following: - Fixes the mask values and adds a description for the same - Removes the central mask setup and makes it IP specific, as it would be different when the number of pipes and queues are different. v2: squash in fix from Shashank Cc: Christian König Cc: Alex Deucher Acked-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 15 +++++++++++++-- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 15 ++++++++++++--- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 85f774063f9b1..f8202e6ed4b15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -150,9 +150,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.compute_hqd_mask[i] = 0xc; } - for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) - adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; - for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { if (i >= adev->sdma.num_instances) break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index da2c9a8cb3e01..52dd54a32fb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -111,8 +111,8 @@ struct amdgpu_mes { uint32_t vmid_mask_gfxhub; uint32_t vmid_mask_mmhub; - uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; + uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c34a00500e183..4cfd86aa2ea3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -669,6 +669,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, offsetof(union MESAPI__MISC, api_status)); } +static void mes_v11_0_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) +{ + /* + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1 for MES scheduling + * mask = 10b + * GFX pipe 1 can't be used for MES due to HW limitation. + */ + pkt->gfx_hqd_mask[0] = 0x2; + pkt->gfx_hqd_mask[1] = 0; +} + static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) { int i; @@ -693,8 +705,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; + mes_v11_0_set_gfx_hqd_mask(&mes_set_hw_res_pkt); for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e6ab617b9a404..62aba0b5dbe2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -694,6 +694,17 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } +static void mes_v12_0_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) +{ + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + pkt->gfx_hqd_mask[0] = 0xFE; +} + static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; @@ -716,9 +727,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = - mes->gfx_hqd_mask[i]; + mes_v12_0_set_gfx_hqd_mask(&mes_set_hw_res_pkt); for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = -- GitLab From f540f69256a3a05973e87bcdd25881a9fe731b61 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 27 Aug 2024 14:55:35 +0530 Subject: [PATCH 158/899] drm/amdgpu: add kernel config for gfx-userqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch: - adds a kernel config option "CONFIG_DRM_AMDGPU_NAVI3X_USERQ" - moves the usequeue initialization code for all IPs under this flag - cover the core userqueue functions under this config - adds stub function for userqueue ioctl. so that the userqueue works only when the config is enabled. V9: Introduce this patch V10: Call it CONFIG_DRM_AMDGPU_NAVI3X_USERQ instead of CONFIG_DRM_AMDGPU_USERQ_GFX (Christian) V11: Add GFX in the config help description message. V12: Add depends on BROKEN for this config, remove this when the rest of the code is available. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/Makefile | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++- 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 1a11cab741aca..5bb44ea80164a 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -96,6 +96,15 @@ config DRM_AMDGPU_WERROR Add -Werror to the build flags for amdgpu.ko. Only enable this if you are warning code for amdgpu.ko. +config DRM_AMDGPU_NAVI3X_USERQ + bool "Enable Navi 3x gfx usermode queues" + depends on DRM_AMDGPU + depends on BROKEN + default n + help + Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute + workload submission. This feature is experimental and supported on Navi 3X only. + source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/amdkfd/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f42b9b4be9a47..376e8f5396aed 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -175,7 +175,9 @@ amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ mes_v12_0.o \ - mes_v11_0_userqueue.o + +# add GFX userqueue support +amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_v11_0_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 5173718c38483..21e805530a6ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -39,6 +39,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, kfree(queue); } +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ static struct amdgpu_usermode_queue * amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { @@ -279,6 +280,13 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, return r; } +#else +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + return 0; +} +#endif int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ed57c3e4c0c49..62a6da083a8fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1614,8 +1614,10 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; +#endif break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -1629,8 +1631,10 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; +#ifdef CONFIG_DRM_AMD_USERQ_GFX adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; +#endif break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index f163b253c2003..d4e1b2cd1f359 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,8 +1377,9 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs; - +#endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; -- GitLab From 97ff1946253971376665ad6d90f8fb23b1288025 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 25 Oct 2024 15:41:53 +0530 Subject: [PATCH 159/899] drm/amdgpu: Implement a new userqueue fence driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Developed a userqueue fence driver for the userqueue process shared BO synchronization. Create a dma fence having write pointer as the seqno and allocate a seq64 memory for each user queue process and feed this memory address into the firmware/hardware, thus the firmware writes the read pointer into the given address when the process completes it execution. Compare wptr and rptr, if rptr >= wptr, signal the fences for the waiting process to consume the buffers. v2: Worked on review comments from Christian for the following modifications - Add wptr as sequence number into the fence - Add a reference count for the fence driver - Add dma_fence_put below the list_del as it might frees the userq fence. - Trim unnecessary code in interrupt handler. - Check dma fence signaled state in dma fence creation function for a potential problem of hardware completing the job processing beforehand. - Add necessary locks. - Create a list and process all the unsignaled fences. - clean up fences in destroy function. - implement .signaled callback function v3: Worked on review comments from Christian - Modify naming convention for reference counted objects - Fix fence driver reference drop issue - Drop amdgpu_userq_fence_driver_process() function return value v4: Worked on review comments from Christian - Moved fence driver allocation into amdgpu_userq_fence_driver_alloc() - Added detail doc mentioning the differences b/w two spinlocks declared. v5: Worked on review comments from Christian - Check before upcast and remove local variable - Add error handling in fence_drv alloc function. - Move rptr read fn outside of the loop and remove WARN_ON in destroy function. v6: - clear the seq64 memory in user fence driver(Christian) - fix for the wptr va bo mapping(Christian) - move the fence_drv xa entry erase code from the interrupt handler into user fence destroy function Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Suggested-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 + .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 257 ++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 69 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 8 + .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 6 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 376e8f5396aed..1cc3079040897 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index dd2ec890d186c..0221e79289894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -52,6 +52,7 @@ #include "amdgpu_sched.h" #include "amdgpu_xgmi.h" #include "amdgpu_userqueue.h" +#include "amdgpu_userq_fence.h" #include "../amdxcp/amdgpu_xcp_drv.h" /* @@ -3039,6 +3040,10 @@ static int __init amdgpu_init(void) if (r) goto error_fence; + r = amdgpu_userq_fence_slab_init(); + if (r) + goto error_fence; + DRM_INFO("amdgpu kernel modesetting enabled.\n"); amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -3070,6 +3075,7 @@ static void __exit amdgpu_exit(void) amdgpu_acpi_release(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c new file mode 100644 index 0000000000000..f7baea2c67abe --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include + +#include + +#include "amdgpu.h" +#include "amdgpu_userq_fence.h" + +static const struct dma_fence_ops amdgpu_userq_fence_ops; +static struct kmem_cache *amdgpu_userq_fence_slab; + +int amdgpu_userq_fence_slab_init(void) +{ + amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", + sizeof(struct amdgpu_userq_fence), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!amdgpu_userq_fence_slab) + return -ENOMEM; + + return 0; +} + +void amdgpu_userq_fence_slab_fini(void) +{ + rcu_barrier(); + kmem_cache_destroy(amdgpu_userq_fence_slab); +} + +static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) +{ + if (!f || f->ops != &amdgpu_userq_fence_ops) + return NULL; + + return container_of(f, struct amdgpu_userq_fence, base); +} + +static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) +{ + return le64_to_cpu(*fence_drv->cpu_addr); +} + +int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *userq) +{ + struct amdgpu_userq_fence_driver *fence_drv; + int r; + + fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); + if (!fence_drv) { + DRM_ERROR("Failed to allocate memory for fence driver\n"); + return -ENOMEM; + } + + /* Acquire seq64 memory */ + r = amdgpu_seq64_alloc(adev, &fence_drv->gpu_addr, + &fence_drv->cpu_addr); + if (r) { + kfree(fence_drv); + return -ENOMEM; + } + + memset(fence_drv->cpu_addr, 0, sizeof(u64)); + + kref_init(&fence_drv->refcount); + INIT_LIST_HEAD(&fence_drv->fences); + spin_lock_init(&fence_drv->fence_list_lock); + + fence_drv->adev = adev; + fence_drv->context = dma_fence_context_alloc(1); + get_task_comm(fence_drv->timeline_name, current); + + userq->fence_drv = fence_drv; + + return 0; +} + +void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +{ + struct amdgpu_userq_fence *userq_fence, *tmp; + struct dma_fence *fence; + u64 rptr; + + if (!fence_drv) + return; + + rptr = amdgpu_userq_fence_read(fence_drv); + + spin_lock(&fence_drv->fence_list_lock); + list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { + fence = &userq_fence->base; + + if (rptr >= fence->seqno) { + dma_fence_signal(fence); + list_del(&userq_fence->link); + + dma_fence_put(fence); + } else { + break; + } + } + spin_unlock(&fence_drv->fence_list_lock); +} + +void amdgpu_userq_fence_driver_destroy(struct kref *ref) +{ + struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, + struct amdgpu_userq_fence_driver, + refcount); + struct amdgpu_device *adev = fence_drv->adev; + struct amdgpu_userq_fence *fence, *tmp; + struct dma_fence *f; + + spin_lock(&fence_drv->fence_list_lock); + list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { + f = &fence->base; + + if (!dma_fence_is_signaled(f)) { + dma_fence_set_error(f, -ECANCELED); + dma_fence_signal(f); + } + + list_del(&fence->link); + dma_fence_put(f); + } + spin_unlock(&fence_drv->fence_list_lock); + + /* Free seq64 memory */ + amdgpu_seq64_free(adev, fence_drv->gpu_addr); + kfree(fence_drv); +} + +void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) +{ + kref_get(&fence_drv->refcount); +} + +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) +{ + kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); +} + +int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + u64 seq, struct dma_fence **f) +{ + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence *userq_fence; + struct dma_fence *fence; + + fence_drv = userq->fence_drv; + if (!fence_drv) + return -EINVAL; + + userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + if (!userq_fence) + return -ENOMEM; + + spin_lock_init(&userq_fence->lock); + INIT_LIST_HEAD(&userq_fence->link); + fence = &userq_fence->base; + userq_fence->fence_drv = fence_drv; + + dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + fence_drv->context, seq); + + amdgpu_userq_fence_driver_get(fence_drv); + dma_fence_get(fence); + + spin_lock(&fence_drv->fence_list_lock); + /* Check if hardware has already processed the job */ + if (!dma_fence_is_signaled(fence)) + list_add_tail(&userq_fence->link, &fence_drv->fences); + else + dma_fence_put(fence); + + spin_unlock(&fence_drv->fence_list_lock); + + *f = fence; + + return 0; +} + +static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_userqueue_fence"; +} + +static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + + return fence->fence_drv->timeline_name; +} + +static bool amdgpu_userq_fence_signaled(struct dma_fence *f) +{ + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + u64 rptr, wptr; + + rptr = amdgpu_userq_fence_read(fence_drv); + wptr = fence->base.seqno; + + if (rptr >= wptr) + return true; + + return false; +} + +static void amdgpu_userq_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); + struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); + struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; + + /* Release the fence driver reference */ + amdgpu_userq_fence_driver_put(fence_drv); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); +} + +static void amdgpu_userq_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_userq_fence_free); +} + +static const struct dma_fence_ops amdgpu_userq_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_userq_fence_get_driver_name, + .get_timeline_name = amdgpu_userq_fence_get_timeline_name, + .signaled = amdgpu_userq_fence_signaled, + .release = amdgpu_userq_fence_release, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h new file mode 100644 index 0000000000000..c3e04cdbb9e72 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_USERQ_FENCE_H__ +#define __AMDGPU_USERQ_FENCE_H__ + +#include + +#include "amdgpu_userqueue.h" + +struct amdgpu_userq_fence { + struct dma_fence base; + /* + * This lock is necessary to synchronize the + * userqueue dma fence operations. + */ + spinlock_t lock; + struct list_head link; + struct amdgpu_userq_fence_driver *fence_drv; +}; + +struct amdgpu_userq_fence_driver { + struct kref refcount; + u64 gpu_addr; + u64 *cpu_addr; + u64 context; + /* + * This lock is necesaary to synchronize the access + * to the fences list by the fence driver. + */ + spinlock_t fence_list_lock; + struct list_head fences; + struct amdgpu_device *adev; + char timeline_name[TASK_COMM_LEN]; +}; + +int amdgpu_userq_fence_slab_init(void); +void amdgpu_userq_fence_slab_fini(void); +int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + u64 seq, struct dma_fence **f); +void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_destroy(struct kref *ref); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 21e805530a6ac..bcd86c1b3a757 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_userqueue.h" +#include "amdgpu_userq_fence.h" static void amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, @@ -35,6 +36,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; uq_funcs->mqd_destroy(uq_mgr, queue); + amdgpu_userq_fence_driver_put(queue->fence_drv); idr_remove(&uq_mgr->userq_idr, queue_id); kfree(queue); } @@ -232,6 +234,12 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; + r = amdgpu_userq_fence_driver_alloc(adev, queue); + if (r) { + DRM_ERROR("Failed to alloc fence driver\n"); + goto unlock; + } + r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); if (r) { DRM_ERROR("Failed to create Queue\n"); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index a653e31350c54..76a59c350c2a2 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -47,6 +47,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; + struct amdgpu_userq_fence_driver *fence_drv; }; struct amdgpu_userq_funcs { -- GitLab From 8493312a94f0cc29be09c200d3a934873ea33b29 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 25 Oct 2024 15:44:02 +0530 Subject: [PATCH 160/899] drm/amdgpu: Add mqd support for the fence address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a field in struct v11_gfx_mqd for userqueue fence address. - Assign fence gpu VA address to the userqueue mqd fence address fields. v2: Remove the mask and replace with lower_32_bits (Christian) Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 11 +++++++++++ drivers/gpu/drm/amd/include/v11_structs.h | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index e70b8e429e9c0..b3aa49ff1a872 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -26,6 +26,7 @@ #include "v11_structs.h" #include "mes_v11_0.h" #include "mes_v11_0_userqueue.h" +#include "amdgpu_userq_fence.h" #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE @@ -229,6 +230,14 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } +static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue) +{ + struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; + + mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr); + mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); +} + static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -306,6 +315,8 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + mes_v11_0_userq_set_fence_space(queue); + /* FW expects WPTR BOs to be mapped into GART */ r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index f8008270f8131..797ce6a1e56eb 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -535,8 +535,8 @@ struct v11_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t reserved_510; // offset: 510 (0x1FE) - uint32_t reserved_511; // offset: 511 (0x1FF) + uint32_t fenceaddress_lo; // offset: 510 (0x1FE) + uint32_t fenceaddress_hi; // offset: 511 (0x1FF) }; struct v11_sdma_mqd { -- GitLab From 2e65ea1ab2f6f3731aba373917d509216701cbe6 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 25 Oct 2024 16:15:02 +0530 Subject: [PATCH 161/899] drm/amdgpu: screen freeze and userq driver crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Screen freeze and userq fence driver crash while playing Xonotic v2: (Christian) - There is change that fence might signal in between testing and grabbing the lock. Hence we can move the lock above the if..else check and use the dma_fence_is_signaled_locked(). Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index f7baea2c67abe..3693453d8b601 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -171,6 +171,7 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, struct amdgpu_userq_fence_driver *fence_drv; struct amdgpu_userq_fence *userq_fence; struct dma_fence *fence; + unsigned long flags; fence_drv = userq->fence_drv; if (!fence_drv) @@ -191,14 +192,14 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, amdgpu_userq_fence_driver_get(fence_drv); dma_fence_get(fence); - spin_lock(&fence_drv->fence_list_lock); /* Check if hardware has already processed the job */ - if (!dma_fence_is_signaled(fence)) + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + if (!dma_fence_is_signaled_locked(fence)) list_add_tail(&userq_fence->link, &fence_drv->fences); else dma_fence_put(fence); - spin_unlock(&fence_drv->fence_list_lock); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); *f = fence; -- GitLab From 6b0c7c367317a663a58f72f79e73ad787aac873d Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:09:50 +0530 Subject: [PATCH 162/899] drm/amdgpu: UAPI headers for userqueue Secure semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add UAPI header support for userqueue Secure semaphore v2: Worked on review comments from Christian for the following modifications - Add bo handles, bo flags and padding fields. - Include value/va in a combined array. v3: Worked on review comments from Christian - Add num_fences field to obtain the number of objects required to allocate memory for userq_fence_info. - Replace obj_handle name with syncobj_handle. - Replace point name with syncobj_point. - Replace count_handles name with num_syncobj_handles. - Fix structure padding related issues. v4: Worked on review comments from Christian - Modify the bo flags description. Signed-off-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 115 ++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 59f0818e8dcda..4e9414c0f9241 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -55,6 +55,8 @@ extern "C" { #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 #define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -73,6 +75,8 @@ extern "C" { #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) /** * DOC: memory domains @@ -448,6 +452,117 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 { __u64 eop_va; }; +/* dma_resv usage flag */ +#define AMDGPU_USERQ_BO_WRITE 1 + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + /** + * @flags: flags to indicate special function for userq fence creation. + * Unused for now. + */ + __u32 flags; + /** + * @syncobj_handles_array: An array of syncobj handles used by the userq fence + * creation IOCTL to install the created dma_fence object which can be + * utilized by userspace to explicitly synchronize GPU commands. + */ + __u64 syncobj_handles_array; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles_array. + */ + __u64 num_syncobj_handles; + /** + * @syncobj_point: A given point on the timeline to be signaled. + * Unused for now. + */ + __u64 syncobj_point; + /** + * @bo_handles_array: An array of GEM BO handles used by the userq fence creation + * IOCTL to install the created dma_fence object which can be utilized by + * userspace to synchronize the BO usage between user processes. + */ + __u64 bo_handles_array; + /** + * @num_bo_handles: A count that represents the number of GEM BO handles in + * @bo_handles_array. + */ + __u32 num_bo_handles; + /** + * @bo_flags: flags to indicate BOs synchronize for READ or WRITE + */ + __u32 bo_flags; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used to retrieve the queue information to store + * the fence driver references in the wait user queue structure. + */ + __u32 waitq_id; + /** + * @flags: flags to specify special function for userq wait information. + * Unused for now. + */ + __u32 flags; + /** + * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the + * matching fence wait info pair in @userq_fence_info. + */ + __u32 bo_wait_flags; + __u32 pad; + /** + * @syncobj_handles_array: An array of syncobj handles defined to get the + * fence wait information of every syncobj handles in the array. + */ + __u64 syncobj_handles_array; + /** + * @bo_handles_array: An array of GEM BO handles defined to fetch the fence + * wait information of every BO handles in the array. + */ + __u64 bo_handles_array; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles_array. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_handles: A count that represents the number of GEM BO handles in + * @bo_handles_array. + */ + __u32 num_bo_handles; + /** + * @userq_fence_info: An array of fence information (va and value) pair of each + * objects stored in @syncobj_handles_array and @bo_handles_array. + */ + __u64 userq_fence_info; + /** + * @num_fences: A count that represents the number of actual fences installed in + * each syncobj and bo handles. + */ + __u64 num_fences; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- GitLab From a292fdecd72834b3bec380baa5db1e69e7f70679 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:26:07 +0530 Subject: [PATCH 163/899] drm/amdgpu: Implement userqueue signal/wait IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces new IOCTL for userqueue secure semaphore. The signal IOCTL called from userspace application creates a drm syncobj and array of bo GEM handles and passed in as parameter to the driver to install the fence into it. The wait IOCTL gets an array of drm syncobjs, finds the fences attached to the drm syncobjs and obtain the array of memory_address/fence_value combintion which are returned to userspace. v2: (Christian) - Install fence into GEM BO object. - Lock all BO's using the dma resv subsystem - Reorder the sequence in signal IOCTL function. - Get write pointer from the shadow wptr - use userq_fence to fetch the va/value in wait IOCTL. v3: (Christian) - Use drm_exec helper for the proper BO drm reserve and avoid BO lock/unlock issues. - fence/fence driver reference count logic for signal/wait IOCTLs. v4: (Christian) - Fixed the drm_exec calling sequence - use dma_resv_for_each_fence_unlock if BO's are not locked - Modified the fence_info array storing logic. v5: (Christian) - Keep fence_drv until wait queue execution. - Add dma_fence_wait for other fences. - Lock BO's using drm_exec as the number of fences in them could change. - Install signaled fences as well into BO/Syncobj. - Move Syncobj fence installation code after the drm_exec_prepare_array. - Directly add dma_resv_usage_rw(args->bo_flags.... - remove unnecessary dma_fence_put. v6: (Christian) - Add xarray stuff to store the fence_drv - Implement a function to iterate over the xarray and drop the fence_drv references. - Add drm_exec_until_all_locked() wrapper - Add a check that if we haven't exceeded the user allocated num_fences before adding dma_fence to the fences array. v7: (Christian) - Use memdup_user() for kmalloc_array + copy_from_user - Move the fence_drv references from the xarray into the newly created fence and drop the fence_drv references when we signal this fence. - Move this locking of BOs before the "if (!wait_info->num_fences)", this way you need this code block only once. - Merge the error handling code and the cleanup + return 0 code. - Initializing the xa should probably be done in the userq code. - Remove the userq back pointer stored in fence_drv. - Pass xarray as parameter in amdgpu_userq_walk_and_drop_fence_drv() v8: (Christian) - Move fence_drv references must come before adding the fence to the list. - Use xa_lock_irqsave_nested for nested spinlock operations. - userq_mgr should be per fpriv and not one per device. - Restructure the interrupt process code for the early exit of the loop. - The reference acquired in the syncobj fence replace code needs to be kept around. - Modify the dma_fence acquire placement in wait IOCTL. - Move USERQ_BO_WRITE flag to UAPI header file. - drop the fence drv reference after telling the hw to stop accessing it. - Add multi sync object support to userq signal IOCTL. V9: (Christian) - Store all the fence_drv ref to other drivers and not ourself. - Remove the userq fence xa implementation and replace with kvmalloc_array. v10: (Christian) - Add a comment for the userq_xa xarray - drop the if check of userq_fence->fence_drv_array - use the i variable to initialize userq_fence->fence_drv_array_count - drop the fence reference before you free the array in the error handling, otherwise it could be that some references leaked. Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 + .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 446 +++++++++++++++++- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 7 + drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 29 +- .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 6 files changed, 483 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02511b6eb6afe..be10fbe293e4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1094,6 +1094,12 @@ struct amdgpu_device { struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM]; const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM]; + /* xarray used to retrieve the user queue fence driver reference + * in the EOP interrupt handler to signal the particular user + * queue fence. + */ + struct xarray userq_xa; + /* df */ struct amdgpu_df df; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0221e79289894..148f859c3b989 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2950,6 +2950,8 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 3693453d8b601..8f9d2427d3800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -25,6 +25,7 @@ #include #include +#include #include #include "amdgpu.h" @@ -92,6 +93,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, spin_lock_init(&fence_drv->fence_list_lock); fence_drv->adev = adev; + fence_drv->uq_fence_drv_xa_ref = &userq->uq_fence_drv_xa; fence_drv->context = dma_fence_context_alloc(1); get_task_comm(fence_drv->timeline_name, current); @@ -105,6 +107,7 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d struct amdgpu_userq_fence *userq_fence, *tmp; struct dma_fence *fence; u64 rptr; + int i; if (!fence_drv) return; @@ -115,14 +118,16 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { fence = &userq_fence->base; - if (rptr >= fence->seqno) { - dma_fence_signal(fence); - list_del(&userq_fence->link); - - dma_fence_put(fence); - } else { + if (rptr < fence->seqno) break; - } + + dma_fence_signal(fence); + + for (i = 0; i < userq_fence->fence_drv_array_count; i++) + amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); + + list_del(&userq_fence->link); + dma_fence_put(fence); } spin_unlock(&fence_drv->fence_list_lock); } @@ -132,8 +137,11 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, struct amdgpu_userq_fence_driver, refcount); + struct amdgpu_userq_fence_driver *xa_fence_drv; struct amdgpu_device *adev = fence_drv->adev; struct amdgpu_userq_fence *fence, *tmp; + struct xarray *xa = &adev->userq_xa; + unsigned long index; struct dma_fence *f; spin_lock(&fence_drv->fence_list_lock); @@ -150,6 +158,12 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) } spin_unlock(&fence_drv->fence_list_lock); + xa_lock(xa); + xa_for_each(xa, index, xa_fence_drv) + if (xa_fence_drv == fence_drv) + __xa_erase(xa, index); + xa_unlock(xa); + /* Free seq64 memory */ amdgpu_seq64_free(adev, fence_drv->gpu_addr); kfree(fence_drv); @@ -192,6 +206,33 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, amdgpu_userq_fence_driver_get(fence_drv); dma_fence_get(fence); + if (!xa_empty(&userq->uq_fence_drv_xa)) { + struct amdgpu_userq_fence_driver *stored_fence_drv; + unsigned long index, count = 0; + int i = 0; + + xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv) + count++; + + userq_fence->fence_drv_array = + kvmalloc_array(count, + sizeof(struct amdgpu_userq_fence_driver *), + GFP_KERNEL); + + if (userq_fence->fence_drv_array) { + xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv) { + userq_fence->fence_drv_array[i] = stored_fence_drv; + xa_erase(&userq->uq_fence_drv_xa, index); + i++; + } + } + + userq_fence->fence_drv_array_count = i; + } else { + userq_fence->fence_drv_array = NULL; + userq_fence->fence_drv_array_count = 0; + } + /* Check if hardware has already processed the job */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); if (!dma_fence_is_signaled_locked(fence)) @@ -241,6 +282,8 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu) /* Release the fence driver reference */ amdgpu_userq_fence_driver_put(fence_drv); + + kvfree(userq_fence->fence_drv_array); kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); } @@ -256,3 +299,392 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { .signaled = amdgpu_userq_fence_signaled, .release = amdgpu_userq_fence_release, }; + +/** + * amdgpu_userq_fence_read_wptr - Read the userq wptr value + * + * @filp: drm file private data structure + * @queue: user mode queue structure pointer + * @wptr: write pointer value + * + * Read the wptr value from userq's MQD. The userq signal IOCTL + * creates a dma_fence for the shared buffers that expects the + * RPTR value written to seq64 memory >= WPTR. + * + * Returns wptr value on success, error on failure. + */ +static int amdgpu_userq_fence_read_wptr(struct drm_file *filp, + struct amdgpu_usermode_queue *queue, + u64 *wptr) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo *bo; + u64 addr, *ptr; + int r; + + addr = queue->userq_prop->wptr_gpu_addr; + addr &= AMDGPU_GMC_HOLE_MASK; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr >> PAGE_SHIFT); + if (!mapping) + return -EINVAL; + + bo = mapping->bo_va->base.bo; + r = amdgpu_bo_reserve(bo, true); + if (r) { + DRM_ERROR("Failed to reserve userqueue wptr bo"); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&ptr); + if (r) { + DRM_ERROR("Failed mapping the userqueue wptr bo"); + goto map_error; + } + + *wptr = le64_to_cpu(*ptr); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return 0; + +map_error: + amdgpu_bo_unreserve(bo); + return r; +} + +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_amdgpu_userq_signal *args = data; + struct amdgpu_usermode_queue *queue; + struct drm_gem_object **gobj = NULL; + struct drm_syncobj **syncobj = NULL; + u32 *syncobj_handles, num_syncobj_handles; + u32 *bo_handles, num_bo_handles; + int r, i, entry, boentry; + struct dma_fence *fence; + struct drm_exec exec; + u64 wptr; + + /* Array of syncobj handles */ + num_syncobj_handles = args->num_syncobj_handles; + syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles_array), + sizeof(u32) * num_syncobj_handles); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + /* Array of pointers to the looked up syncobjs */ + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + if (!syncobj) { + r = -ENOMEM; + goto free_syncobj_handles; + } + + for (entry = 0; entry < num_syncobj_handles; entry++) { + syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); + if (!syncobj[entry]) { + r = -ENOENT; + goto free_syncobj; + } + } + + /* Array of bo handles */ + num_bo_handles = args->num_bo_handles; + bo_handles = memdup_user(u64_to_user_ptr(args->bo_handles_array), + sizeof(u32) * num_bo_handles); + if (IS_ERR(bo_handles)) + goto free_syncobj; + + /* Array of pointers to the GEM objects */ + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL); + if (!gobj) { + r = -ENOMEM; + goto free_bo_handles; + } + + for (boentry = 0; boentry < num_bo_handles; boentry++) { + gobj[boentry] = drm_gem_object_lookup(filp, bo_handles[boentry]); + if (!gobj[boentry]) { + r = -ENOENT; + goto put_gobj; + } + } + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) + goto exec_fini; + } + + /*Retrieve the user queue */ + queue = idr_find(&userq_mgr->userq_idr, args->queue_id); + if (!queue) { + r = -ENOENT; + goto exec_fini; + } + + r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr); + if (r) + goto exec_fini; + + /* Create a new fence */ + r = amdgpu_userq_fence_create(queue, wptr, &fence); + if (r) + goto exec_fini; + + for (i = 0; i < num_bo_handles; i++) + dma_resv_add_fence(gobj[i]->resv, fence, + dma_resv_usage_rw(args->bo_flags & + AMDGPU_USERQ_BO_WRITE)); + + /* Add the created fence to syncobj/BO's */ + for (i = 0; i < num_syncobj_handles; i++) + drm_syncobj_replace_fence(syncobj[i], fence); + + /* drop the reference acquired in fence creation function */ + dma_fence_put(fence); + +exec_fini: + drm_exec_fini(&exec); +put_gobj: + while (boentry-- > 0) + drm_gem_object_put(gobj[boentry]); + kfree(gobj); +free_bo_handles: + kfree(bo_handles); +free_syncobj: + while (entry-- > 0) + if (syncobj[entry]) + drm_syncobj_put(syncobj[entry]); + kfree(syncobj); +free_syncobj_handles: + kfree(syncobj_handles); + + return r; +} + +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_amdgpu_userq_fence_info *fence_info = NULL; + struct drm_amdgpu_userq_wait *wait_info = data; + u32 *syncobj_handles, *bo_handles; + struct dma_fence **fences = NULL; + u32 num_syncobj, num_bo_handles; + struct drm_gem_object **gobj; + struct drm_exec exec; + int r, i, entry, cnt; + u64 num_fences = 0; + + num_bo_handles = wait_info->num_bo_handles; + bo_handles = memdup_user(u64_to_user_ptr(wait_info->bo_handles_array), + sizeof(u32) * num_bo_handles); + if (IS_ERR(bo_handles)) + return PTR_ERR(bo_handles); + + num_syncobj = wait_info->num_syncobj_handles; + syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles_array), + sizeof(u32) * num_syncobj); + if (IS_ERR(syncobj_handles)) { + r = PTR_ERR(syncobj_handles); + goto free_bo_handles; + } + + /* Array of GEM object handles */ + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL); + if (!gobj) { + r = -ENOMEM; + goto free_syncobj_handles; + } + + for (entry = 0; entry < num_bo_handles; entry++) { + gobj[entry] = drm_gem_object_lookup(filp, bo_handles[entry]); + if (!gobj[entry]) { + r = -ENOENT; + goto put_gobj; + } + } + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0); + drm_exec_retry_on_contention(&exec); + if (r) { + drm_exec_fini(&exec); + goto put_gobj; + } + } + + if (!wait_info->num_fences) { + /* Count syncobj's fence */ + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; + + r = drm_syncobj_find_fence(filp, syncobj_handles[i], + 0, 0, &fence); + if (r) + goto exec_fini; + + num_fences++; + dma_fence_put(fence); + } + + /* Count GEM objects fence */ + for (i = 0; i < num_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv, + dma_resv_usage_rw(wait_info->bo_wait_flags & + AMDGPU_USERQ_BO_WRITE), fence) + num_fences++; + } + + /* + * Passing num_fences = 0 means that userspace doesn't want to + * retrieve userq_fence_info. If num_fences = 0 we skip filling + * userq_fence_info and return the actual number of fences on + * args->num_fences. + */ + wait_info->num_fences = num_fences; + } else { + /* Array of fence info */ + fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); + if (!fence_info) { + r = -ENOMEM; + goto exec_fini; + } + + /* Array of fences */ + fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) { + r = -ENOMEM; + goto free_fence_info; + } + + /* Retrieve GEM objects fence */ + for (i = 0; i < num_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv, + dma_resv_usage_rw(wait_info->bo_wait_flags & + AMDGPU_USERQ_BO_WRITE), fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + dma_fence_get(fence); + } + } + + /* Retrieve syncobj's fence */ + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; + + r = drm_syncobj_find_fence(filp, syncobj_handles[i], + 0, 0, &fence); + if (r) + goto free_fences; + + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + } + + for (i = 0, cnt = 0; i < wait_info->num_fences; i++) { + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence *userq_fence; + u32 index; + + userq_fence = to_amdgpu_userq_fence(fences[i]); + if (!userq_fence) { + /* + * Just waiting on other driver fences should + * be good for now + */ + dma_fence_wait(fences[i], false); + dma_fence_put(fences[i]); + + continue; + } + + fence_drv = userq_fence->fence_drv; + /* + * We need to make sure the user queue release their reference + * to the fence drivers at some point before queue destruction. + * Otherwise, we would gather those references until we don't + * have any more space left and crash. + */ + if (fence_drv->uq_fence_drv_xa_ref) { + r = xa_alloc(fence_drv->uq_fence_drv_xa_ref, &index, fence_drv, + xa_limit_32b, GFP_KERNEL); + if (r) + goto free_fences; + + amdgpu_userq_fence_driver_get(fence_drv); + } + + /* Store drm syncobj's gpu va address and value */ + fence_info[cnt].va = fence_drv->gpu_addr; + fence_info[cnt].value = fences[i]->seqno; + + dma_fence_put(fences[i]); + /* Increment the actual userq fence count */ + cnt++; + } + + wait_info->num_fences = cnt; + /* Copy userq fence info to user space */ + if (copy_to_user(u64_to_user_ptr(wait_info->userq_fence_info), + fence_info, wait_info->num_fences * sizeof(*fence_info))) { + r = -EFAULT; + goto free_fences; + } + + kfree(fences); + kfree(fence_info); + } + + drm_exec_fini(&exec); + for (i = 0; i < num_bo_handles; i++) + drm_gem_object_put(gobj[i]); + kfree(gobj); + + kfree(syncobj_handles); + kfree(bo_handles); + + return 0; + +free_fences: + while (num_fences-- > 0) + dma_fence_put(fences[num_fences]); + kfree(fences); +free_fence_info: + kfree(fence_info); +exec_fini: + drm_exec_fini(&exec); +put_gobj: + while (entry-- > 0) + drm_gem_object_put(gobj[entry]); + kfree(gobj); +free_syncobj_handles: + kfree(syncobj_handles); +free_bo_handles: + kfree(bo_handles); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index c3e04cdbb9e72..f72424248cc52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -37,7 +37,9 @@ struct amdgpu_userq_fence { */ spinlock_t lock; struct list_head link; + unsigned long fence_drv_array_count; struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence_driver **fence_drv_array; }; struct amdgpu_userq_fence_driver { @@ -52,6 +54,7 @@ struct amdgpu_userq_fence_driver { spinlock_t fence_list_lock; struct list_head fences; struct amdgpu_device *adev; + struct xarray *uq_fence_drv_xa_ref; char timeline_name[TASK_COMM_LEN]; }; @@ -65,5 +68,9 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_destroy(struct kref *ref); +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index bcd86c1b3a757..34c1297d79707 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -27,6 +27,32 @@ #include "amdgpu_userqueue.h" #include "amdgpu_userq_fence.h" +static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) +{ + struct amdgpu_userq_fence_driver *fence_drv; + unsigned long index; + + if (xa_empty(xa)) + return; + + xa_lock(xa); + xa_for_each(xa, index, fence_drv) { + __xa_erase(xa, index); + amdgpu_userq_fence_driver_put(fence_drv); + } + + xa_unlock(xa); +} + +static void +amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) +{ + amdgpu_userq_walk_and_drop_fence_drv(&userq->uq_fence_drv_xa); + xa_destroy(&userq->uq_fence_drv_xa); + /* Drop the fence_drv reference held by user queue */ + amdgpu_userq_fence_driver_put(userq->fence_drv); +} + static void amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, @@ -36,7 +62,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; uq_funcs->mqd_destroy(uq_mgr, queue); - amdgpu_userq_fence_driver_put(queue->fence_drv); + amdgpu_userq_fence_driver_free(queue); idr_remove(&uq_mgr->userq_idr, queue_id); kfree(queue); } @@ -234,6 +260,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; + xa_init_flags(&queue->uq_fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, queue); if (r) { DRM_ERROR("Failed to alloc fence driver\n"); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 76a59c350c2a2..7df837fedce00 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -47,6 +47,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; + struct xarray uq_fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; }; -- GitLab From 15e30a6e479282fef4365bd586159911c8cf140d Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:33:28 +0530 Subject: [PATCH 164/899] drm/amdgpu: Add wait IOCTL timeline syncobj support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add user fence wait IOCTL timeline syncobj support. v2:(Christian) - handle dma_fence_wait() return value. - shorten the variable name syncobj_timeline_points a bit. - move num_points up to avoid padding issues. v3:(Christian) - Handle timeline drm_syncobj_find_fence() call error handling - Use dma_fence_unwrap_for_each() in timeline fence as there could be more than one fence. v4:(Christian) - Drop the first num_fences since fence is always included in the dma_fence_unwrap_for_each() iteration, when fence != f then fence is most likely just a container. v5: Added Alex RB to merge the kernel UAPI changes since he has already approved the amdgpu_drm.h changes. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 91 +++++++++++++++++-- include/uapi/drm/amdgpu_drm.h | 16 +++- 2 files changed, 99 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 8f9d2427d3800..1a9565b61266a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -474,11 +475,11 @@ free_syncobj_handles: int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles; + u32 num_syncobj, num_bo_handles, num_points; struct drm_amdgpu_userq_fence_info *fence_info = NULL; struct drm_amdgpu_userq_wait *wait_info = data; - u32 *syncobj_handles, *bo_handles; struct dma_fence **fences = NULL; - u32 num_syncobj, num_bo_handles; struct drm_gem_object **gobj; struct drm_exec exec; int r, i, entry, cnt; @@ -498,11 +499,26 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_bo_handles; } + num_points = wait_info->num_points; + timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), + sizeof(u32) * num_points); + if (IS_ERR(timeline_handles)) { + r = PTR_ERR(timeline_handles); + goto free_syncobj_handles; + } + + timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), + sizeof(u32) * num_points); + if (IS_ERR(timeline_points)) { + r = PTR_ERR(timeline_points); + goto free_timeline_handles; + } + /* Array of GEM object handles */ gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL); if (!gobj) { r = -ENOMEM; - goto free_syncobj_handles; + goto free_timeline_points; } for (entry = 0; entry < num_bo_handles; entry++) { @@ -524,12 +540,34 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } if (!wait_info->num_fences) { + if (num_points) { + struct dma_fence_unwrap iter; + struct dma_fence *fence; + struct dma_fence *f; + + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto exec_fini; + + dma_fence_unwrap_for_each(f, &iter, fence) + num_fences++; + + dma_fence_put(fence); + } + } + /* Count syncobj's fence */ for (i = 0; i < num_syncobj; i++) { struct dma_fence *fence; r = drm_syncobj_find_fence(filp, syncobj_handles[i], - 0, 0, &fence); + 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); if (r) goto exec_fini; @@ -588,12 +626,41 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } } + if (num_points) { + struct dma_fence_unwrap iter; + struct dma_fence *fence; + struct dma_fence *f; + + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + dma_fence_unwrap_for_each(f, &iter, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + dma_fence_get(f); + fences[num_fences++] = f; + } + + dma_fence_put(fence); + } + } + /* Retrieve syncobj's fence */ for (i = 0; i < num_syncobj; i++) { struct dma_fence *fence; r = drm_syncobj_find_fence(filp, syncobj_handles[i], - 0, 0, &fence); + 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); if (r) goto free_fences; @@ -616,9 +683,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, * Just waiting on other driver fences should * be good for now */ - dma_fence_wait(fences[i], false); - dma_fence_put(fences[i]); + r = dma_fence_wait(fences[i], true); + if (r) { + dma_fence_put(fences[i]); + goto free_fences; + } + dma_fence_put(fences[i]); continue; } @@ -664,6 +735,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, drm_gem_object_put(gobj[i]); kfree(gobj); + kfree(timeline_points); + kfree(timeline_handles); kfree(syncobj_handles); kfree(bo_handles); @@ -681,6 +754,10 @@ put_gobj: while (entry-- > 0) drm_gem_object_put(gobj[entry]); kfree(gobj); +free_timeline_points: + kfree(timeline_points); +free_timeline_handles: + kfree(timeline_handles); free_syncobj_handles: kfree(syncobj_handles); free_bo_handles: diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4e9414c0f9241..1a21259cb8c4a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -530,12 +530,26 @@ struct drm_amdgpu_userq_wait { * matching fence wait info pair in @userq_fence_info. */ __u32 bo_wait_flags; - __u32 pad; + /** + * @num_points: A count that represents the number of timeline syncobj handles in + * syncobj_handles_array. + */ + __u32 num_points; /** * @syncobj_handles_array: An array of syncobj handles defined to get the * fence wait information of every syncobj handles in the array. */ __u64 syncobj_handles_array; + /** + * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the + * fence wait information of every timeline syncobj handles in the array. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: An array of timeline syncobj points defined to get the + * fence wait points of every timeline syncobj handles in the syncobj_handles_array. + */ + __u64 syncobj_timeline_points; /** * @bo_handles_array: An array of GEM BO handles defined to fetch the fence * wait information of every BO handles in the array. -- GitLab From 8949843762631d9d0fc526dfb61a272dca29fc6f Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:38:46 +0530 Subject: [PATCH 165/899] drm/amdgpu: Enable userq fence interrupt support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to handle the userqueue protected fence signal hardware interrupt. Create a xarray which maps the doorbell index to the fence driver address. This would help to retrieve the fence driver information when an userq fence interrupt is triggered. Firmware sends the doorbell offset value and this info is compared with the queue's mqd doorbell offset value. If they are same, we process the userq fence interrupt. v1:(Christian): - use xa_load to extract the fence driver. - move the amdgpu_userq_fence_driver_process call within the xa_lock as there is a chance that fence_drv might be freed. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 6 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 25 +++++++++---------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cfaa5d77b20ac..fdd271edb0b91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4336,6 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); + xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ); + INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 1a9565b61266a..cd473c985e369 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -71,6 +71,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq) { struct amdgpu_userq_fence_driver *fence_drv; + unsigned long flags; int r; fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); @@ -98,6 +99,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, fence_drv->context = dma_fence_context_alloc(1); get_task_comm(fence_drv->timeline_name, current); + xa_lock_irqsave(&adev->userq_xa, flags); + __xa_store(&adev->userq_xa, userq->doorbell_index, + fence_drv, GFP_KERNEL); + xa_unlock_irqrestore(&adev->userq_xa, flags); + userq->fence_drv = fence_drv; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 62a6da083a8fe..b061b654d1ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -49,6 +49,7 @@ #include "nbio_v4_3.h" #include "mes_v11_0.h" #include "mes_v11_0_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -6334,25 +6335,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; -- GitLab From 70773bef4e091ff6d2a91e3dfb4f29013eb81f1f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:09:49 +0200 Subject: [PATCH 166/899] drm/amdgpu: update userqueue BOs and PDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the VM_IOCTL to allow userspace to synchronize the mapping/unmapping of a BO in the page table. The major changes are: - it adds a drm_timeline object as an input parameter to the VM IOCTL. - this object is used by the kernel to sync the update of the BO in the page table during the mapping of the object. - the kernel also synchronizes the tlb flush of the page table entry of this object during the unmapping (Added in this series: https://patchwork.freedesktop.org/series/131276/ and https://patchwork.freedesktop.org/patch/584182/) - the userspace can wait on this timeline, and then the BO is ready to be consumed by the GPU. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: - remove the eviction fence coupling V3: - added the drm timeline support instead of input/output fence (Christian) V4: - made timeline 64-bit (Christian) - bug fix (Arvind) V5: GLCTS bug fix (Arvind) V6: Rename syncobj_handle -> timeline_syncobj_out Rename point -> timeline_point_in (Marek) V7: Addressed review comments from Christian: - do not send last_update fence in case of vm_clear_freed, instead return the fence from gen_va_update_vm - move the functions to update bo_mapping to amdgpu_gem.c - do not use amdgpu_userq_update_vm anymore in userq_create() V8: Addressed review comments from Christian: - Split amdgpu_gem_update_bo_mapping function. - amdgpu_gem_va_update_vm should return stub for error. V9: Addressed review comments from Christian: - Rename the function amdgpu_gem_update_timeline_node. - amdgpu_gem_update_timeline_node should be void function. - when timeline_point is zero don't allocate a chain and call drm_syncobj_replace_fence() instead of drm_syncobj_add_point(). V11: rebase V12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. V13: Fix the review comment by renaming timeline syncobj (Marek) Cc: Alex Deucher Cc: Felix Kuehling Cc: Christian König Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 113 ++++++++++++++++++++++-- include/uapi/drm/amdgpu_drm.h | 9 ++ 2 files changed, 113 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 69429df094771..542a1b70f2ee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_display.h" @@ -44,6 +45,75 @@ #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +static int +amdgpu_gem_update_timeline_node(struct drm_file *filp, + uint32_t syncobj_handle, + uint64_t point, + struct drm_syncobj **syncobj, + struct dma_fence_chain **chain) +{ + if (!syncobj_handle) + return 0; + + /* Find the sync object */ + *syncobj = drm_syncobj_find(filp, syncobj_handle); + if (!*syncobj) + return -ENOENT; + + if (!point) + return 0; + + /* Allocate the chain node */ + *chain = dma_fence_chain_alloc(); + if (!*chain) { + drm_syncobj_put(*syncobj); + return -ENOMEM; + } + + return 0; +} + +static void +amdgpu_gem_update_bo_mapping(struct drm_file *filp, + struct amdgpu_bo_va *bo_va, + uint32_t operation, + uint64_t point, + struct dma_fence *fence, + struct drm_syncobj *syncobj, + struct dma_fence_chain *chain) +{ + struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct dma_fence *last_update; + + if (!syncobj) + return; + + /* Find the last update fence */ + switch (operation) { + case AMDGPU_VA_OP_MAP: + case AMDGPU_VA_OP_REPLACE: + if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)) + last_update = vm->last_update; + else + last_update = bo_va->last_pt_update; + break; + case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: + last_update = fence; + break; + default: + return; + } + + /* Add fence to timeline */ + if (!point) + drm_syncobj_replace_fence(syncobj, last_update); + else + drm_syncobj_add_point(syncobj, chain, last_update, point); +} + static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -638,18 +708,23 @@ out: * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. + * + * Returns resulting fence if freed BO(s) got cleared from the PT. + * otherwise stub fence in case of error. */ -static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_va *bo_va, - uint32_t operation) +static struct dma_fence * +amdgpu_gem_va_update_vm(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo_va *bo_va, + uint32_t operation) { + struct dma_fence *fence = dma_fence_get_stub(); int r; if (!amdgpu_vm_ready(vm)) - return; + return fence; - r = amdgpu_vm_clear_freed(adev, vm, NULL); + r = amdgpu_vm_clear_freed(adev, vm, &fence); if (r) goto error; @@ -665,6 +740,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); + + return fence; } /** @@ -713,6 +790,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo *abo; struct amdgpu_bo_va *bo_va; + struct drm_syncobj *timeline_syncobj = NULL; + struct dma_fence_chain *timeline_chain = NULL; + struct dma_fence *fence; struct drm_exec exec; uint64_t va_flags; uint64_t vm_size; @@ -827,9 +907,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, - args->operation); + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { + + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + + fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, + args->operation); + + if (!r) + amdgpu_gem_update_bo_mapping(filp, bo_va, + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + } error: drm_exec_fini(&exec); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1a21259cb8c4a..ca82935ff93aa 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -857,6 +857,15 @@ struct drm_amdgpu_gem_va { __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; }; #define AMDGPU_HW_IP_GFX 0 -- GitLab From ac4a1f7f13309f8235a0c4719c34094de3303dfd Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:46:49 +0530 Subject: [PATCH 167/899] drm/amdgpu: Remove the MES self test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove MES self test as this conflicts the userqueue fence interrupts. v2:(Christian) - remove the amdgpu_mes_self_test() function and any now unused code. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 169 --------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 - drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 14 +- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 13 +- 5 files changed, 2 insertions(+), 199 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fdd271edb0b91..4da18ed0beeae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5144,9 +5144,6 @@ exit: } adev->in_suspend = false; - if (adev->enable_mes) - amdgpu_mes_self_test(adev); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) DRM_WARN("smart shift update failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f8202e6ed4b15..acdca3110c248 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1405,175 +1405,6 @@ out_unlock: return r; } -static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, - int pasid, int *gang_id, - int queue_type, int num_queue, - struct amdgpu_ring **added_rings, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang_properties gprops = {0}; - int r, j; - - /* create a gang for the process */ - gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.gang_quantum = adev->mes.default_gang_quantum; - gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - - r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id); - if (r) { - DRM_ERROR("failed to add gang\n"); - return r; - } - - /* create queues for the gang */ - for (j = 0; j < num_queue; j++) { - r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j, - ctx_data, &ring); - if (r) { - DRM_ERROR("failed to add ring\n"); - break; - } - - DRM_INFO("ring %s was added\n", ring->name); - added_rings[j] = ring; - } - - return 0; -} - -static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings) -{ - struct amdgpu_ring *ring; - int i, r; - - for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) { - ring = added_rings[i]; - if (!ring) - continue; - - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - - r = amdgpu_ring_test_ib(ring, 1000 * 10); - if (r) { - DRM_DEV_ERROR(ring->adev->dev, - "ring %s ib test failed (%d)\n", - ring->name, r); - return r; - } else - DRM_INFO("ring %s ib test pass\n", ring->name); - } - - return 0; -} - -int amdgpu_mes_self_test(struct amdgpu_device *adev) -{ - struct amdgpu_vm *vm = NULL; - struct amdgpu_mes_ctx_data ctx_data = {0}; - struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL }; - int gang_ids[3] = {0}; - int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 }, - { AMDGPU_RING_TYPE_COMPUTE, 1 }, - { AMDGPU_RING_TYPE_SDMA, 1} }; - int i, r, pasid, k = 0; - - pasid = amdgpu_pasid_alloc(16); - if (pasid < 0) { - dev_warn(adev->dev, "No more PASIDs available!"); - pasid = 0; - } - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) { - r = -ENOMEM; - goto error_pasid; - } - - r = amdgpu_vm_init(adev, vm, -1); - if (r) { - DRM_ERROR("failed to initialize vm\n"); - goto error_pasid; - } - - r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); - if (r) { - DRM_ERROR("failed to alloc ctx meta data\n"); - goto error_fini; - } - - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; - r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); - if (r) { - DRM_ERROR("failed to map ctx meta data\n"); - goto error_vm; - } - - r = amdgpu_mes_create_process(adev, pasid, vm); - if (r) { - DRM_ERROR("failed to create MES process\n"); - goto error_vm; - } - - for (i = 0; i < ARRAY_SIZE(queue_types); i++) { - /* On GFX v10.3, fw hasn't supported to map sdma queue. */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= - IP_VERSION(10, 3, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < - IP_VERSION(11, 0, 0) && - queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) - continue; - - r = amdgpu_mes_test_create_gang_and_queues(adev, pasid, - &gang_ids[i], - queue_types[i][0], - queue_types[i][1], - &added_rings[k], - &ctx_data); - if (r) - goto error_queues; - - k += queue_types[i][1]; - } - - /* start ring test and ib test for MES queues */ - amdgpu_mes_test_queues(added_rings); - -error_queues: - /* remove all queues */ - for (i = 0; i < ARRAY_SIZE(added_rings); i++) { - if (!added_rings[i]) - continue; - amdgpu_mes_remove_ring(adev, added_rings[i]); - } - - for (i = 0; i < ARRAY_SIZE(gang_ids); i++) { - if (!gang_ids[i]) - continue; - amdgpu_mes_remove_gang(adev, gang_ids[i]); - } - - amdgpu_mes_destroy_process(adev, pasid); - -error_vm: - amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); - -error_fini: - amdgpu_vm_fini(adev, vm); - -error_pasid: - if (pasid) - amdgpu_pasid_free(pasid); - - amdgpu_mes_ctx_free_meta_data(&ctx_data); - kfree(vm); - return 0; -} - int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) { const struct mes_firmware_header_v1_0 *mes_hdr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 52dd54a32fb47..78362a8382122 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -470,8 +470,6 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_self_test(struct amdgpu_device *adev); - int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); /* diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 4cfd86aa2ea3d..ccc19a40f03d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1712,22 +1712,10 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static int mes_v11_0_late_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && - (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .name = "mes_v11_0", .early_init = mes_v11_0_early_init, - .late_init = mes_v11_0_late_init, + .late_init = NULL, .sw_init = mes_v11_0_sw_init, .sw_fini = mes_v11_0_sw_fini, .hw_init = mes_v11_0_hw_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 62aba0b5dbe2a..801928555effb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1820,21 +1820,10 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static int mes_v12_0_late_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v12_0_ip_funcs = { .name = "mes_v12_0", .early_init = mes_v12_0_early_init, - .late_init = mes_v12_0_late_init, + .late_init = NULL, .sw_init = mes_v12_0_sw_init, .sw_fini = mes_v12_0_sw_fini, .hw_init = mes_v12_0_hw_init, -- GitLab From e7cf21fbb2773cfcf70189be524041b21e6509dc Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:51:58 +0530 Subject: [PATCH 168/899] drm/amdgpu: Few optimization and fixes for userq fence driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Few optimization and fixes for userq fence driver. v1:(Christian): - Remove unnecessary comments. - In drm_exec_init call give num_bo_handles as last parameter it would making allocation of the array more efficient - Handle return value of __xa_store() and improve the error handling of amdgpu_userq_fence_driver_alloc(). v2:(Christian): - Revert userq_xa xarray init to XA_FLAGS_LOCK_IRQ. - move the xa_unlock before the error check of the call xa_err(__xa_store()) and moved this change to a separate patch as this is adding a missing error handling. - Removed the unnecessary comments. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 44 ++++++++++++------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 6 +-- .../gpu/drm/amd/include/amdgpu_userqueue.h | 2 +- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index cd473c985e369..b475643ab5ecc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -77,7 +77,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); if (!fence_drv) { DRM_ERROR("Failed to allocate memory for fence driver\n"); - return -ENOMEM; + r = -ENOMEM; + goto free_fence_drv; } /* Acquire seq64 memory */ @@ -85,7 +86,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, &fence_drv->cpu_addr); if (r) { kfree(fence_drv); - return -ENOMEM; + r = -ENOMEM; + goto free_seq64; } memset(fence_drv->cpu_addr, 0, sizeof(u64)); @@ -95,7 +97,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, spin_lock_init(&fence_drv->fence_list_lock); fence_drv->adev = adev; - fence_drv->uq_fence_drv_xa_ref = &userq->uq_fence_drv_xa; + fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa; fence_drv->context = dma_fence_context_alloc(1); get_task_comm(fence_drv->timeline_name, current); @@ -107,6 +109,13 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, userq->fence_drv = fence_drv; return 0; + +free_seq64: + amdgpu_seq64_free(adev, fence_drv->gpu_addr); +free_fence_drv: + kfree(fence_drv); + + return r; } void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) @@ -148,7 +157,7 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) struct amdgpu_device *adev = fence_drv->adev; struct amdgpu_userq_fence *fence, *tmp; struct xarray *xa = &adev->userq_xa; - unsigned long index; + unsigned long index, flags; struct dma_fence *f; spin_lock(&fence_drv->fence_list_lock); @@ -165,11 +174,11 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) } spin_unlock(&fence_drv->fence_list_lock); - xa_lock(xa); + xa_lock_irqsave(xa, flags); xa_for_each(xa, index, xa_fence_drv) if (xa_fence_drv == fence_drv) __xa_erase(xa, index); - xa_unlock(xa); + xa_unlock_irqrestore(xa, flags); /* Free seq64 memory */ amdgpu_seq64_free(adev, fence_drv->gpu_addr); @@ -213,12 +222,12 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, amdgpu_userq_fence_driver_get(fence_drv); dma_fence_get(fence); - if (!xa_empty(&userq->uq_fence_drv_xa)) { + if (!xa_empty(&userq->fence_drv_xa)) { struct amdgpu_userq_fence_driver *stored_fence_drv; unsigned long index, count = 0; int i = 0; - xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv) + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) count++; userq_fence->fence_drv_array = @@ -227,9 +236,9 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, GFP_KERNEL); if (userq_fence->fence_drv_array) { - xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv) { + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { userq_fence->fence_drv_array[i] = stored_fence_drv; - xa_erase(&userq->uq_fence_drv_xa, index); + xa_erase(&userq->fence_drv_xa, index); i++; } } @@ -379,7 +388,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_exec exec; u64 wptr; - /* Array of syncobj handles */ num_syncobj_handles = args->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles_array), sizeof(u32) * num_syncobj_handles); @@ -401,7 +409,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - /* Array of bo handles */ num_bo_handles = args->num_bo_handles; bo_handles = memdup_user(u64_to_user_ptr(args->bo_handles_array), sizeof(u32) * num_bo_handles); @@ -423,7 +430,9 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles); + + /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 1); drm_exec_retry_on_contention(&exec); @@ -520,7 +529,6 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_timeline_handles; } - /* Array of GEM object handles */ gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL); if (!gobj) { r = -ENOMEM; @@ -535,7 +543,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles); + + /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0); drm_exec_retry_on_contention(&exec); @@ -706,8 +716,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, * Otherwise, we would gather those references until we don't * have any more space left and crash. */ - if (fence_drv->uq_fence_drv_xa_ref) { - r = xa_alloc(fence_drv->uq_fence_drv_xa_ref, &index, fence_drv, + if (fence_drv->fence_drv_xa_ptr) { + r = xa_alloc(fence_drv->fence_drv_xa_ptr, &index, fence_drv, xa_limit_32b, GFP_KERNEL); if (r) goto free_fences; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index f72424248cc52..89c82ba38b504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -54,7 +54,7 @@ struct amdgpu_userq_fence_driver { spinlock_t fence_list_lock; struct list_head fences; struct amdgpu_device *adev; - struct xarray *uq_fence_drv_xa_ref; + struct xarray *fence_drv_xa_ptr; char timeline_name[TASK_COMM_LEN]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 34c1297d79707..15c568fb062b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -47,8 +47,8 @@ static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) static void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) { - amdgpu_userq_walk_and_drop_fence_drv(&userq->uq_fence_drv_xa); - xa_destroy(&userq->uq_fence_drv_xa); + amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); + xa_destroy(&userq->fence_drv_xa); /* Drop the fence_drv reference held by user queue */ amdgpu_userq_fence_driver_put(userq->fence_drv); } @@ -260,7 +260,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; - xa_init_flags(&queue->uq_fence_drv_xa, XA_FLAGS_ALLOC); + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, queue); if (r) { DRM_ERROR("Failed to alloc fence driver\n"); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 7df837fedce00..b942f3f5ea353 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -47,7 +47,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; - struct xarray uq_fence_drv_xa; + struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; }; -- GitLab From fbea3d3174f4215ca1c867a1c035c1fa5ad01015 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:55:22 +0530 Subject: [PATCH 169/899] drm/amdgpu: Add the missing error handling for xa_store() call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing error handling for xa_store() call in the function amdgpu_userq_fence_driver_alloc(). Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index b475643ab5ecc..969a3a75d8152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -102,9 +102,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, get_task_comm(fence_drv->timeline_name, current); xa_lock_irqsave(&adev->userq_xa, flags); - __xa_store(&adev->userq_xa, userq->doorbell_index, - fence_drv, GFP_KERNEL); + r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, + fence_drv, GFP_KERNEL)); xa_unlock_irqrestore(&adev->userq_xa, flags); + if (r) + goto free_seq64; userq->fence_drv = fence_drv; -- GitLab From d8675102ba322a4ccd0dd8bc5adf799246314ddf Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:56:57 +0530 Subject: [PATCH 170/899] drm/amdgpu: add vm root BO lock before accessing the vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a vm root BO lock before accessing the userqueue VM. v1:(Christian) - Keep the VM locked until you are done with the mapping. - Grab a temporary BO reference, drop the VM lock and acquire the BO. When you are done with everything just drop the BO lock and then the temporary BO reference. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 969a3a75d8152..6c9346f822c19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -321,7 +321,6 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { /** * amdgpu_userq_fence_read_wptr - Read the userq wptr value * - * @filp: drm file private data structure * @queue: user mode queue structure pointer * @wptr: write pointer value * @@ -331,25 +330,29 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { * * Returns wptr value on success, error on failure. */ -static int amdgpu_userq_fence_read_wptr(struct drm_file *filp, - struct amdgpu_usermode_queue *queue, +static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, u64 *wptr) { - struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo *bo; u64 addr, *ptr; int r; + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) + return r; + addr = queue->userq_prop->wptr_gpu_addr; addr &= AMDGPU_GMC_HOLE_MASK; - mapping = amdgpu_vm_bo_lookup_mapping(vm, addr >> PAGE_SHIFT); - if (!mapping) + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); return -EINVAL; + } - bo = mapping->bo_va->base.bo; + bo = amdgpu_bo_ref(mapping->bo_va->base.bo); + amdgpu_bo_unreserve(queue->vm->root.bo); r = amdgpu_bo_reserve(bo, true); if (r) { DRM_ERROR("Failed to reserve userqueue wptr bo"); @@ -366,11 +369,14 @@ static int amdgpu_userq_fence_read_wptr(struct drm_file *filp, amdgpu_bo_kunmap(bo); amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return 0; map_error: amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; } @@ -449,7 +455,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, goto exec_fini; } - r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr); + r = amdgpu_userq_fence_read_wptr(queue, &wptr); if (r) goto exec_fini; -- GitLab From cb4a73f46f253b5f7a30b1e0488c8ef2832e8747 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:59:04 +0530 Subject: [PATCH 171/899] drm/amdgpu: Add separate array of read and write for BO handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop AMDGPU_USERQ_BO_WRITE as this should not be a global option of the IOCTL, It should be option per buffer. Hence adding separate array for read and write BO handles. v2(Marek): - Internal kernel details shouldn't be here. This file should only document the observed behavior, not the implementation . v3: - Fix DAL CI clang issue. v4: - Added Alex RB to merge the kernel UAPI changes since he has already approved the amdgpu_drm.h changes. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Alex Deucher Acked-by: Christian König Suggested-by: Marek Olšák Suggested-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 246 +++++++++++++----- include/uapi/drm/amdgpu_drm.h | 50 ++-- 2 files changed, 215 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 6c9346f822c19..9f1ca86593356 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -386,12 +386,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; struct drm_amdgpu_userq_signal *args = data; + struct drm_gem_object **gobj_write = NULL; + struct drm_gem_object **gobj_read = NULL; struct amdgpu_usermode_queue *queue; - struct drm_gem_object **gobj = NULL; struct drm_syncobj **syncobj = NULL; + u32 *bo_handles_write, num_write_bo_handles; u32 *syncobj_handles, num_syncobj_handles; - u32 *bo_handles, num_bo_handles; - int r, i, entry, boentry; + u32 *bo_handles_read, num_read_bo_handles; + int r, i, entry, rentry, wentry; struct dma_fence *fence; struct drm_exec exec; u64 wptr; @@ -417,32 +419,63 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - num_bo_handles = args->num_bo_handles; - bo_handles = memdup_user(u64_to_user_ptr(args->bo_handles_array), - sizeof(u32) * num_bo_handles); - if (IS_ERR(bo_handles)) + num_read_bo_handles = args->num_read_bo_handles; + bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), + sizeof(u32) * num_read_bo_handles); + if (IS_ERR(bo_handles_read)) { + r = PTR_ERR(bo_handles_read); goto free_syncobj; + } + + /* Array of pointers to the GEM read objects */ + gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); + if (!gobj_read) { + r = -ENOMEM; + goto free_bo_handles_read; + } + + for (rentry = 0; rentry < num_read_bo_handles; rentry++) { + gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); + if (!gobj_read[rentry]) { + r = -ENOENT; + goto put_gobj_read; + } + } + + num_write_bo_handles = args->num_write_bo_handles; + bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), + sizeof(u32) * num_write_bo_handles); + if (IS_ERR(bo_handles_write)) { + r = PTR_ERR(bo_handles_write); + goto put_gobj_read; + } - /* Array of pointers to the GEM objects */ - gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL); - if (!gobj) { + /* Array of pointers to the GEM write objects */ + gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); + if (!gobj_write) { r = -ENOMEM; - goto free_bo_handles; + goto free_bo_handles_write; } - for (boentry = 0; boentry < num_bo_handles; boentry++) { - gobj[boentry] = drm_gem_object_lookup(filp, bo_handles[boentry]); - if (!gobj[boentry]) { + for (wentry = 0; wentry < num_write_bo_handles; wentry++) { + gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); + if (!gobj_write[wentry]) { r = -ENOENT; - goto put_gobj; + goto put_gobj_write; } } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + (num_read_bo_handles + num_write_bo_handles)); /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) + goto exec_fini; + + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); if (r) goto exec_fini; @@ -464,10 +497,21 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, if (r) goto exec_fini; - for (i = 0; i < num_bo_handles; i++) - dma_resv_add_fence(gobj[i]->resv, fence, - dma_resv_usage_rw(args->bo_flags & - AMDGPU_USERQ_BO_WRITE)); + for (i = 0; i < num_read_bo_handles; i++) { + if (!gobj_read || !gobj_read[i]->resv) + continue; + + dma_resv_add_fence(gobj_read[i]->resv, fence, + DMA_RESV_USAGE_READ); + } + + for (i = 0; i < num_write_bo_handles; i++) { + if (!gobj_write || !gobj_write[i]->resv) + continue; + + dma_resv_add_fence(gobj_write[i]->resv, fence, + DMA_RESV_USAGE_WRITE); + } /* Add the created fence to syncobj/BO's */ for (i = 0; i < num_syncobj_handles; i++) @@ -478,12 +522,18 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, exec_fini: drm_exec_fini(&exec); -put_gobj: - while (boentry-- > 0) - drm_gem_object_put(gobj[boentry]); - kfree(gobj); -free_bo_handles: - kfree(bo_handles); +put_gobj_write: + while (wentry-- > 0) + drm_gem_object_put(gobj_write[wentry]); + kfree(gobj_write); +free_bo_handles_write: + kfree(bo_handles_write); +put_gobj_read: + while (rentry-- > 0) + drm_gem_object_put(gobj_read[rentry]); + kfree(gobj_read); +free_bo_handles_read: + kfree(bo_handles_read); free_syncobj: while (entry-- > 0) if (syncobj[entry]) @@ -498,28 +548,37 @@ free_syncobj_handles: int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles; - u32 num_syncobj, num_bo_handles, num_points; + u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; + u32 num_syncobj, num_read_bo_handles, num_write_bo_handles, num_points; struct drm_amdgpu_userq_fence_info *fence_info = NULL; struct drm_amdgpu_userq_wait *wait_info = data; + struct drm_gem_object **gobj_write; + struct drm_gem_object **gobj_read; struct dma_fence **fences = NULL; - struct drm_gem_object **gobj; + int r, i, rentry, wentry, cnt; struct drm_exec exec; - int r, i, entry, cnt; u64 num_fences = 0; - num_bo_handles = wait_info->num_bo_handles; - bo_handles = memdup_user(u64_to_user_ptr(wait_info->bo_handles_array), - sizeof(u32) * num_bo_handles); - if (IS_ERR(bo_handles)) - return PTR_ERR(bo_handles); + num_read_bo_handles = wait_info->num_read_bo_handles; + bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), + sizeof(u32) * num_read_bo_handles); + if (IS_ERR(bo_handles_read)) + return PTR_ERR(bo_handles_read); + + num_write_bo_handles = wait_info->num_write_bo_handles; + bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), + sizeof(u32) * num_write_bo_handles); + if (IS_ERR(bo_handles_write)) { + r = PTR_ERR(bo_handles_write); + goto free_bo_handles_read; + } num_syncobj = wait_info->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles_array), sizeof(u32) * num_syncobj); if (IS_ERR(syncobj_handles)) { r = PTR_ERR(syncobj_handles); - goto free_bo_handles; + goto free_bo_handles_write; } num_points = wait_info->num_points; @@ -537,29 +596,51 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_timeline_handles; } - gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL); - if (!gobj) { + gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); + if (!gobj_read) { r = -ENOMEM; goto free_timeline_points; } - for (entry = 0; entry < num_bo_handles; entry++) { - gobj[entry] = drm_gem_object_lookup(filp, bo_handles[entry]); - if (!gobj[entry]) { + for (rentry = 0; rentry < num_read_bo_handles; rentry++) { + gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); + if (!gobj_read[rentry]) { + r = -ENOENT; + goto put_gobj_read; + } + } + + gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); + if (!gobj_write) { + r = -ENOMEM; + goto put_gobj_read; + } + + for (wentry = 0; wentry < num_write_bo_handles; wentry++) { + gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); + if (!gobj_write[wentry]) { r = -ENOENT; - goto put_gobj; + goto put_gobj_write; } } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + (num_read_bo_handles + num_write_bo_handles)); /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0); + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); if (r) { drm_exec_fini(&exec); - goto put_gobj; + goto put_gobj_write; + } + + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + drm_exec_fini(&exec); + goto put_gobj_write; } } @@ -600,13 +681,21 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } /* Count GEM objects fence */ - for (i = 0; i < num_bo_handles; i++) { + for (i = 0; i < num_read_bo_handles; i++) { struct dma_resv_iter resv_cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv, - dma_resv_usage_rw(wait_info->bo_wait_flags & - AMDGPU_USERQ_BO_WRITE), fence) + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) + num_fences++; + } + + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) num_fences++; } @@ -632,14 +721,30 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_fence_info; } - /* Retrieve GEM objects fence */ - for (i = 0; i < num_bo_handles; i++) { + /* Retrieve GEM read objects fence */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + dma_fence_get(fence); + } + } + + /* Retrieve GEM write objects fence */ + for (i = 0; i < num_write_bo_handles; i++) { struct dma_resv_iter resv_cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv, - dma_resv_usage_rw(wait_info->bo_wait_flags & - AMDGPU_USERQ_BO_WRITE), fence) { + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) { if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { r = -EINVAL; goto free_fences; @@ -755,14 +860,19 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } drm_exec_fini(&exec); - for (i = 0; i < num_bo_handles; i++) - drm_gem_object_put(gobj[i]); - kfree(gobj); + for (i = 0; i < num_read_bo_handles; i++) + drm_gem_object_put(gobj_read[i]); + kfree(gobj_read); + + for (i = 0; i < num_write_bo_handles; i++) + drm_gem_object_put(gobj_write[i]); + kfree(gobj_write); kfree(timeline_points); kfree(timeline_handles); kfree(syncobj_handles); - kfree(bo_handles); + kfree(bo_handles_write); + kfree(bo_handles_read); return 0; @@ -774,18 +884,24 @@ free_fence_info: kfree(fence_info); exec_fini: drm_exec_fini(&exec); -put_gobj: - while (entry-- > 0) - drm_gem_object_put(gobj[entry]); - kfree(gobj); +put_gobj_write: + while (wentry-- > 0) + drm_gem_object_put(gobj_write[wentry]); + kfree(gobj_write); +put_gobj_read: + while (rentry-- > 0) + drm_gem_object_put(gobj_read[rentry]); + kfree(gobj_read); free_timeline_points: kfree(timeline_points); free_timeline_handles: kfree(timeline_handles); free_syncobj_handles: kfree(syncobj_handles); -free_bo_handles: - kfree(bo_handles); +free_bo_handles_write: + kfree(bo_handles_write); +free_bo_handles_read: + kfree(bo_handles_read); return r; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ca82935ff93aa..02cf03e811d5f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -452,9 +452,6 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 { __u64 eop_va; }; -/* dma_resv usage flag */ -#define AMDGPU_USERQ_BO_WRITE 1 - /* userq signal/wait ioctl */ struct drm_amdgpu_userq_signal { /** @@ -484,20 +481,30 @@ struct drm_amdgpu_userq_signal { */ __u64 syncobj_point; /** - * @bo_handles_array: An array of GEM BO handles used by the userq fence creation - * IOCTL to install the created dma_fence object which can be utilized by - * userspace to synchronize the BO usage between user processes. + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_read_bo_handles: A count that represents the number of read BO handles in + * @bo_read_handles. */ - __u64 bo_handles_array; + __u32 num_read_bo_handles; /** - * @num_bo_handles: A count that represents the number of GEM BO handles in - * @bo_handles_array. + * @num_write_bo_handles: A count that represents the number of write BO handles in + * @bo_write_handles. */ - __u32 num_bo_handles; + __u32 num_write_bo_handles; /** * @bo_flags: flags to indicate BOs synchronize for READ or WRITE */ __u32 bo_flags; + __u32 pad; }; struct drm_amdgpu_userq_fence_info { @@ -551,20 +558,31 @@ struct drm_amdgpu_userq_wait { */ __u64 syncobj_timeline_points; /** - * @bo_handles_array: An array of GEM BO handles defined to fetch the fence - * wait information of every BO handles in the array. + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. */ - __u64 bo_handles_array; + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in * @syncobj_handles_array. */ __u32 num_syncobj_handles; /** - * @num_bo_handles: A count that represents the number of GEM BO handles in - * @bo_handles_array. + * @num_read_bo_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_read_bo_handles; + /** + * @num_write_bo_handles: A count that represents the number of write BO handles in + * @bo_write_handles. */ - __u32 num_bo_handles; + __u32 num_write_bo_handles; + __u32 pad; /** * @userq_fence_info: An array of fence information (va and value) pair of each * objects stored in @syncobj_handles_array and @bo_handles_array. -- GitLab From f7cb6a28e172bd470803d64697f7a9c708609da2 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 11:19:26 +0530 Subject: [PATCH 172/899] drm/amdgpu: Add gpu_addr support to seq64 allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add gpu address support to seq64 alloc function. v1:(Christian) - Add the user of this new interface change to the same patch. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 10 ++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 1 + 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index e22cb2b5cd926..0defad71044c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -163,7 +163,8 @@ error: * Returns: * 0 on success or a negative error code on failure */ -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, + u64 *gpu_addr, u64 **cpu_addr) { unsigned long bit_pos; @@ -172,7 +173,12 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) return -ENOSPC; __set_bit(bit_pos, adev->seq64.used); + *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); + + if (gpu_addr) + *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr; + *cpu_addr = bit_pos + adev->seq64.cpu_base_addr; return 0; @@ -233,7 +239,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) */ r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &adev->seq64.sbo, NULL, + &adev->seq64.sbo, &adev->seq64.gpu_addr, (void **)&adev->seq64.cpu_base_addr); if (r) { dev_warn(adev->dev, "(%d) create seq64 failed\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h index 4203b2ab318df..26a249aaaee15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -32,13 +32,14 @@ struct amdgpu_seq64 { struct amdgpu_bo *sbo; u32 num_sem; + u64 gpu_addr; u64 *cpu_base_addr; DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS); }; void amdgpu_seq64_fini(struct amdgpu_device *adev); int amdgpu_seq64_init(struct amdgpu_device *adev); -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr); void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 9f1ca86593356..d7697d3f55e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -82,7 +82,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, } /* Acquire seq64 memory */ - r = amdgpu_seq64_alloc(adev, &fence_drv->gpu_addr, + r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, &fence_drv->cpu_addr); if (r) { kfree(fence_drv); @@ -113,7 +113,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, return 0; free_seq64: - amdgpu_seq64_free(adev, fence_drv->gpu_addr); + amdgpu_seq64_free(adev, fence_drv->va); free_fence_drv: kfree(fence_drv); @@ -183,7 +183,7 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref) xa_unlock_irqrestore(xa, flags); /* Free seq64 memory */ - amdgpu_seq64_free(adev, fence_drv->gpu_addr); + amdgpu_seq64_free(adev, fence_drv->va); kfree(fence_drv); } @@ -839,7 +839,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } /* Store drm syncobj's gpu va address and value */ - fence_info[cnt].va = fence_drv->gpu_addr; + fence_info[cnt].va = fence_drv->va; fence_info[cnt].value = fences[i]->seqno; dma_fence_put(fences[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index 89c82ba38b504..f1a90840ac1fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -44,6 +44,7 @@ struct amdgpu_userq_fence { struct amdgpu_userq_fence_driver { struct kref refcount; + u64 va; u64 gpu_addr; u64 *cpu_addr; u64 context; -- GitLab From 189ee986b0142c8176aa09c47e66b611ca29d731 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 11:26:37 +0530 Subject: [PATCH 173/899] drm/amdgpu: add userq specific kernel config for fence ioctls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the user queue fence signal and wait IOCTLs in the kernel config CONFIG_DRM_AMDGPU_NAVI3X_USERQ. v2(Christian): - Remove the userq specific config added for kernel queues fence init function. v3(Alex): - It will be better to return an error(-ENOTSUPP) in these cases. Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index d7697d3f55e5a..85af0d5200926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -318,6 +318,7 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = { .release = amdgpu_userq_fence_release, }; +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /** * amdgpu_userq_fence_read_wptr - Read the userq wptr value * @@ -544,7 +545,15 @@ free_syncobj_handles: return r; } +#else +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + return -ENOTSUPP; +} +#endif +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -905,3 +914,10 @@ free_bo_handles_read: return r; } +#else +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + return -ENOTSUPP; +} +#endif -- GitLab From 38c67ec9aa4be7bc0ae55e7bebe95f615fa09a1e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:10:41 +0200 Subject: [PATCH 174/899] drm/amdgpu: Add input fence to sync bo map/unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds input fences to VM_IOCTL for buffer object. The kernel will map/unmap the BO only when the fence is signaled. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: Bug fix (Arvind) V3: Bug fix (Arvind) V4: Rename UAPI objects as per UAPI review (Marek) V5: Addressed review comemnts from Christian - function should return error. - Add 'TODO' comment - The input fence should be independent of the operation. V6: Addressed review comemnts from Christian - Release the memory allocated by memdup_user(). V7: Addressed review comemnts from Christian - Drop the debug print and add "return r;" for the error handling. V11: Rebase v12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. v13: Fix deadlock issue. v14: Fix merge conflict. v15: Fix review comment by renaming syncobj handles. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 46 +++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 4 +++ 2 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 542a1b70f2ee9..bdf46cb4327fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -45,6 +45,45 @@ #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +static int +amdgpu_gem_add_input_fence(struct drm_file *filp, + uint64_t syncobj_handles_array, + uint32_t num_syncobj_handles) +{ + struct dma_fence *fence; + uint32_t *syncobj_handles; + int ret, i; + + if (!num_syncobj_handles) + return 0; + + syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array), + sizeof(uint32_t) * num_syncobj_handles); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + for (i = 0; i < num_syncobj_handles; i++) { + + if (!syncobj_handles[i]) { + ret = -EINVAL; + goto free_memdup; + } + + ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence); + if (ret) + goto free_memdup; + + dma_fence_wait(fence, false); + + /* TODO: optimize async handling */ + dma_fence_put(fence); + } + +free_memdup: + kfree(syncobj_handles); + return ret; +} + static int amdgpu_gem_update_timeline_node(struct drm_file *filp, uint32_t syncobj_handle, @@ -854,6 +893,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, abo = NULL; } + r = amdgpu_gem_add_input_fence(filp, + args->input_fence_syncobj_handles, + args->num_syncobj_handles); + if (r) + goto error_put_gobj; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { @@ -928,6 +973,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, error: drm_exec_fini(&exec); +error_put_gobj: drm_gem_object_put(gobj); return r; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 02cf03e811d5f..0910a6f8c5f2c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -884,6 +884,10 @@ struct drm_amdgpu_gem_va { * at vm_timeline_point. */ __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; }; #define AMDGPU_HW_IP_GFX 0 -- GitLab From 5f2f78314c5c3e4d87d638eea5a9592e89947e9e Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 28 Nov 2023 19:52:30 +0530 Subject: [PATCH 175/899] Revert "drm/amdgpu: don't allow userspace to create a doorbell BO" This reverts commit 6be2ad4f0073c541146caa66c5ae936c955a8224. This patch was to block userspace to use doorbell manager UAPI until usermode queue UAPI gets approved. UQ UAPI got approved in the following MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index bdf46cb4327fb..06171eab6e923 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -430,10 +430,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint32_t handle, initial_domain; int r; - /* reject DOORBELLs until userspace code to use it is available */ - if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL) - return -EINVAL; - /* reject invalid gem flags */ if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | -- GitLab From 2e06b175fff5ba1415b74fed441c0d066b8c8dab Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 11 Nov 2024 12:34:30 +0100 Subject: [PATCH 176/899] drm/amdgpu: fix userqueue UAPI comments This patch fixes some of the pending UAPI review comments from the libDRM/UAPI review process. - It updates some outdated comments in the userqueue UAPI header highlighted during the libdrm UAPI review. - It removes the GDS BO support which was found unused. - It also removes the unused flags parameter from the UAPI. - It also adds a padding variables in userqueue in/out structures. (Pierre-Eric and Marek) - clarify comments on top of drm_amdgpu_userq_in - clarify comment for queue_id (in) - clarify comment for mqd - clarify comment for compute MQD size - clarify comment for queue_id (out) - remove GDB object from BO object list - remove the unused flags parameter Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 6 --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 4 +- include/uapi/drm/amdgpu_drm.h | 54 +++++++++---------- 3 files changed, 26 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 15c568fb062b1..e946c6d1dd6be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -225,11 +225,6 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) return -EINVAL; } - if (args->in.flags) { - DRM_ERROR("Usermode queue flags not supported yet\n"); - return -EINVAL; - } - mutex_lock(&uq_mgr->userq_mutex); uq_funcs = adev->userq_funcs[args->in.ip_type]; @@ -248,7 +243,6 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) queue->doorbell_handle = args->in.doorbell_handle; queue->doorbell_index = args->in.doorbell_offset; queue->queue_type = args->in.ip_type; - queue->flags = args->in.flags; queue->vm = &fpriv->vm; /* Convert relative doorbell offset into absolute doorbell index */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index b3aa49ff1a872..fe4efe5ba6acc 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -201,8 +201,8 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, mqd->shadow_base_lo = mqd_gfx_v11->shadow_va & 0xFFFFFFFC; mqd->shadow_base_hi = upper_32_bits(mqd_gfx_v11->shadow_va); - mqd->gds_bkup_base_lo = mqd_gfx_v11->gds_va & 0xFFFFFFFC; - mqd->gds_bkup_base_hi = upper_32_bits(mqd_gfx_v11->gds_va); + mqd->gds_bkup_base_lo = 0; + mqd->gds_bkup_base_hi = 0; mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC; mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0910a6f8c5f2c..6158496cc1d0d 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -325,35 +325,28 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; -/* user queue IOCTL */ +/* user queue IOCTL operations */ #define AMDGPU_USERQ_OP_CREATE 1 #define AMDGPU_USERQ_OP_FREE 2 -/* Flag to indicate secure buffer related workload, unused for now */ -#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) -/* Flag to indicate AQL workload, unused for now */ -#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) - /* - * MQD (memory queue descriptor) is a set of parameters which allow - * the GPU to uniquely define and identify a usermode queue. This - * structure defines the MQD for GFX-V11 IP ver 0. + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. */ struct drm_amdgpu_userq_in { /** AMDGPU_USERQ_OP_* */ __u32 op; - /** Queue handle for USERQ_OP_FREE */ + /** Queue id passed for operation USERQ_OP_FREE */ __u32 queue_id; /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ __u32 ip_type; - /** - * @flags: flags to indicate special function for queue like secure - * buffer (TMZ). Unused for now. - */ - __u32 flags; /** * @doorbell_handle: the handle of doorbell GEM object - * associated to this client. + * associated with this userqueue client. */ __u32 doorbell_handle; /** @@ -362,7 +355,7 @@ struct drm_amdgpu_userq_in { * and doorbell_offset in the doorbell bo. */ __u32 doorbell_offset; - + __u32 _pad; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets. @@ -387,25 +380,31 @@ struct drm_amdgpu_userq_in { */ __u64 wptr_va; /** - * @mqd: Queue descriptor for USERQ_OP_CREATE + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * * MQD data can be of different size for different GPU IP/engine and * their respective versions/revisions, so this points to a __u64 * - * which holds MQD of this usermode queue. + * which holds IP specific MQD of this usermode queue. */ __u64 mqd; /** * @size: size of MQD data in bytes, it must match the MQD structure * size of the respective engine/revision defined in UAPI for ex, for - * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11). + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). */ __u64 mqd_size; }; +/* The structure to carry output of userqueue ops */ struct drm_amdgpu_userq_out { - /** Queue handle */ + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ __u32 queue_id; - /** Flags */ - __u32 flags; + __u32 _pad; }; union drm_amdgpu_userq { @@ -420,11 +419,6 @@ struct drm_amdgpu_userq_mqd_gfx11 { * Use AMDGPU_INFO_IOCTL to find the exact size of the object. */ __u64 shadow_va; - /** - * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. - * Use AMDGPU_INFO_IOCTL to find the exact size of the object. - */ - __u64 gds_va; /** * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. * Use AMDGPU_INFO_IOCTL to find the exact size of the object. @@ -446,8 +440,8 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { struct drm_amdgpu_userq_mqd_compute_gfx11 { /** * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. - * This must be a from a separate GPU object, and must be at least 1 page - * sized. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. */ __u64 eop_va; }; -- GitLab From d9e697f19bda777a7934e3bbdc67889b06d58019 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 30 Oct 2024 15:32:27 +0100 Subject: [PATCH 177/899] drm/amdgpu: bypass SRIOV check for shadow size info Currently, the shadow FW space size and alignment information is protected under a flag (adev->gfx.cp_gfx_shadow) which gets set only in case of SRIOV setups. if (amdgpu_sriov_vf(adev)) adev->gfx.cp_gfx_shadow = true; But we need this information for GFX Userqueues, so that user can create these objects while creating userqueue. This patch series creates a method to get this information bypassing the dependency on this check. This patch: - adds a new input parameter flag to the gfx.funcs->get_gfx_shadow_info fptr definition, so that it can accommodate the information without the check (adev->gfx.cp_gfx_shadow) on request. - updates the existing definition of amdgpu_gfx_get_gfx_shadow_info to adjust with this new flag. Next patch in the series is adding a UAPI which will consume this info. V2: split this patch from the new UAPI patch Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 5d70b3ae3fe1e..319e6e547c734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -305,7 +305,8 @@ struct amdgpu_gfx_funcs { void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, - struct amdgpu_gfx_shadow_info *shadow_info); + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, @@ -503,7 +504,7 @@ struct amdgpu_gfx_ras_mem_id_entry { #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) -#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) +#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false)) /** * amdgpu_gfx_create_bitmask - create a bitmask diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b061b654d1ec2..933c4ad5eff9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1086,14 +1086,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, #define MQD_FWWORKAREA_SIZE 484 #define MQD_FWWORKAREA_ALIGNMENT 256 -static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, +static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info) { - if (adev->gfx.cp_gfx_shadow) { - shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; - shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; - shadow_info->csa_size = MQD_FWWORKAREA_SIZE; - shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); return 0; } else { memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); -- GitLab From 2761bb9a31f1b863037547d73dc6aac1461ceab6 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 11 Nov 2024 12:43:07 +0530 Subject: [PATCH 178/899] drm/amdgpu: Modify userq signal/wait struct field names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify kernel UAPI userq signal/wait struct field names and description corresponding to the libdrm UAPI review comments. libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 20 ++-- include/uapi/drm/amdgpu_drm.h | 102 +++++++----------- 2 files changed, 46 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 85af0d5200926..6157a540c9297 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -400,7 +400,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, u64 wptr; num_syncobj_handles = args->num_syncobj_handles; - syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles_array), + syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), sizeof(u32) * num_syncobj_handles); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); @@ -420,7 +420,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - num_read_bo_handles = args->num_read_bo_handles; + num_read_bo_handles = args->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), sizeof(u32) * num_read_bo_handles); if (IS_ERR(bo_handles_read)) { @@ -443,7 +443,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - num_write_bo_handles = args->num_write_bo_handles; + num_write_bo_handles = args->num_bo_write_handles; bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), sizeof(u32) * num_write_bo_handles); if (IS_ERR(bo_handles_write)) { @@ -558,23 +558,23 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; - u32 num_syncobj, num_read_bo_handles, num_write_bo_handles, num_points; + u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; struct drm_amdgpu_userq_fence_info *fence_info = NULL; struct drm_amdgpu_userq_wait *wait_info = data; struct drm_gem_object **gobj_write; struct drm_gem_object **gobj_read; struct dma_fence **fences = NULL; + u16 num_points, num_fences = 0; int r, i, rentry, wentry, cnt; struct drm_exec exec; - u64 num_fences = 0; - num_read_bo_handles = wait_info->num_read_bo_handles; + num_read_bo_handles = wait_info->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), sizeof(u32) * num_read_bo_handles); if (IS_ERR(bo_handles_read)) return PTR_ERR(bo_handles_read); - num_write_bo_handles = wait_info->num_write_bo_handles; + num_write_bo_handles = wait_info->num_bo_write_handles; bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), sizeof(u32) * num_write_bo_handles); if (IS_ERR(bo_handles_write)) { @@ -583,14 +583,14 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } num_syncobj = wait_info->num_syncobj_handles; - syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles_array), + syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), sizeof(u32) * num_syncobj); if (IS_ERR(syncobj_handles)) { r = PTR_ERR(syncobj_handles); goto free_bo_handles_write; } - num_points = wait_info->num_points; + num_points = wait_info->num_syncobj_timeline_handles; timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), sizeof(u32) * num_points); if (IS_ERR(timeline_handles)) { @@ -858,7 +858,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, wait_info->num_fences = cnt; /* Copy userq fence info to user space */ - if (copy_to_user(u64_to_user_ptr(wait_info->userq_fence_info), + if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), fence_info, wait_info->num_fences * sizeof(*fence_info))) { r = -EFAULT; goto free_fences; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6158496cc1d0d..72dc16dbca7fc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -453,27 +453,17 @@ struct drm_amdgpu_userq_signal { * to retrieve the WPTR. */ __u32 queue_id; + __u32 pad; /** - * @flags: flags to indicate special function for userq fence creation. - * Unused for now. - */ - __u32 flags; - /** - * @syncobj_handles_array: An array of syncobj handles used by the userq fence - * creation IOCTL to install the created dma_fence object which can be - * utilized by userspace to explicitly synchronize GPU commands. + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. */ - __u64 syncobj_handles_array; + __u64 syncobj_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles_array. + * @syncobj_handles. */ __u64 num_syncobj_handles; - /** - * @syncobj_point: A given point on the timeline to be signaled. - * Unused for now. - */ - __u64 syncobj_point; /** * @bo_read_handles: The list of BO handles that the submitted user queue job * is using for read only. This will update BO fences in the kernel. @@ -485,20 +475,15 @@ struct drm_amdgpu_userq_signal { */ __u64 bo_write_handles; /** - * @num_read_bo_handles: A count that represents the number of read BO handles in + * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles. */ - __u32 num_read_bo_handles; + __u32 num_bo_read_handles; /** - * @num_write_bo_handles: A count that represents the number of write BO handles in + * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles. */ - __u32 num_write_bo_handles; - /** - * @bo_flags: flags to indicate BOs synchronize for READ or WRITE - */ - __u32 bo_flags; - __u32 pad; + __u32 num_bo_write_handles; }; struct drm_amdgpu_userq_fence_info { @@ -517,38 +502,18 @@ struct drm_amdgpu_userq_fence_info { struct drm_amdgpu_userq_wait { /** - * @waitq_id: Queue handle used to retrieve the queue information to store - * the fence driver references in the wait user queue structure. - */ - __u32 waitq_id; - /** - * @flags: flags to specify special function for userq wait information. - * Unused for now. - */ - __u32 flags; - /** - * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the - * matching fence wait info pair in @userq_fence_info. - */ - __u32 bo_wait_flags; - /** - * @num_points: A count that represents the number of timeline syncobj handles in - * syncobj_handles_array. - */ - __u32 num_points; - /** - * @syncobj_handles_array: An array of syncobj handles defined to get the - * fence wait information of every syncobj handles in the array. + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. */ - __u64 syncobj_handles_array; + __u64 syncobj_handles; /** - * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the - * fence wait information of every timeline syncobj handles in the array. + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. */ - __u64 syncobj_timeline_handles; + __u64 syncobj_timeline_handles; /** - * @syncobj_timeline_points: An array of timeline syncobj points defined to get the - * fence wait points of every timeline syncobj handles in the syncobj_handles_array. + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. */ __u64 syncobj_timeline_points; /** @@ -561,32 +526,37 @@ struct drm_amdgpu_userq_wait { * job to get the va/value pairs. */ __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles_array. + * @syncobj_handles. */ __u32 num_syncobj_handles; /** - * @num_read_bo_handles: A count that represents the number of read BO handles in + * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles. */ - __u32 num_read_bo_handles; + __u32 num_bo_read_handles; /** - * @num_write_bo_handles: A count that represents the number of write BO handles in + * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles. */ - __u32 num_write_bo_handles; - __u32 pad; - /** - * @userq_fence_info: An array of fence information (va and value) pair of each - * objects stored in @syncobj_handles_array and @bo_handles_array. - */ - __u64 userq_fence_info; + __u32 num_bo_write_handles; /** - * @num_fences: A count that represents the number of actual fences installed in - * each syncobj and bo handles. + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. */ - __u64 num_fences; + __u64 out_fences; }; /* vm ioctl */ -- GitLab From aed7caf2d4fc659cacfd4358ae4893e2a50480f1 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 24 Oct 2024 17:07:42 +0200 Subject: [PATCH 179/899] drm/amdgpu: add get_gfx_shadow_info callback for gfx12 This callback gets the size and alignment requirements for the gfx shadow buffer for preemption. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index b4211e6e64860..4f9a73bae3324 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -910,6 +910,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev, soc24_grbm_select(adev, me, pipe, q, vm); } +/* all sizes are in bytes */ +#define MQD_SHADOW_BASE_SIZE 73728 +#define MQD_SHADOW_BASE_ALIGNMENT 256 +#define MQD_FWWORKAREA_SIZE 484 +#define MQD_FWWORKAREA_ALIGNMENT 256 + +static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info) +{ + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info); + return 0; + } + + memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); + return -EINVAL; +} + static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter, .select_se_sh = &gfx_v12_0_select_se_sh, @@ -918,6 +946,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk, + .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info, }; static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev) -- GitLab From 90c448fef3120d79bd8031665213981c966dbaf4 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 30 Oct 2024 15:39:42 +0100 Subject: [PATCH 180/899] drm/amdgpu: add new AMDGPU_INFO subquery for userq objects This patch adds a new subquery (AMDGPU_INFO_UQ_FW_AREAS) in AMDGPU_INFO_IOCTL to get the size and alignment of shadow and csa objects from the FW setup. This information is required for the userqueue consumers. V2: Added Alex's suggestions and addressed review comments: - make this query IP specific (GFX/SDMA etc) - give a better title (AMDGPU_INFO_UQ_METADATA) - restructured the code as per sample code shared by Alex V3: Split the UAPI patch from shadow_size_fn modifications V4: Addressed review comments from UAPI review (Marek/Pierre-Eric) - Change the query name to AMDGPU_INFO_UQ_FW_AREAS - remove unused inpur parameter for AMDGPU_HW_IP* UAPI link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/400/ Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 36 ++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 40 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1fcf0ef063153..2e07776dd408d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -371,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, return 0; } +static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_gfx *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->shadow_size = shadow.shadow_size; + meta->shadow_alignment = shadow.shadow_alignment; + meta->csa_size = shadow.csa_size; + meta->csa_alignment = shadow.csa_alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -1294,6 +1314,22 @@ out: return copy_to_user(out, &gpuvm_fault, min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; } + case AMDGPU_INFO_UQ_FW_AREAS: { + struct drm_amdgpu_info_uq_metadata meta_info = {}; + + switch (info->query_hw_ip.type) { + case AMDGPU_HW_IP_GFX: + ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + default: + return -EINVAL; + } + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 72dc16dbca7fc..5dbd9037afe75 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1193,6 +1193,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { #define AMDGPU_INFO_MAX_IBS 0x22 /* query last page fault info */ #define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -1469,6 +1471,27 @@ struct drm_amdgpu_info_hw_ip { __u32 ip_discovery_version; }; +/* GFX metadata BO sizes and alignment info (in bytes) */ +struct drm_amdgpu_info_uq_fw_areas_gfx { + /* shadow area size */ + __u32 shadow_size; + /* shadow area base virtual mem alignment */ + __u32 shadow_alignment; + /* context save area size */ + __u32 csa_size; + /* context save area base virtual mem alignment */ + __u32 csa_alignment; +}; + +/* IP specific fw related information used in the + * subquery AMDGPU_INFO_UQ_FW_AREAS + */ +struct drm_amdgpu_info_uq_fw_areas { + union { + struct drm_amdgpu_info_uq_fw_areas_gfx gfx; + }; +}; + struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; @@ -1532,6 +1555,23 @@ struct drm_amdgpu_info_gpuvm_fault { __u32 vmhub; }; +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + /* * Supported GPU families */ -- GitLab From a640126fbda25275775ab50ab735d6144d43fe3b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 13 Nov 2024 13:40:33 +0530 Subject: [PATCH 181/899] drm/amdgpu: add the argument description for gpu_addr Add argument description for the input argument gpu_addr for amdgpu_seq64_alloc. Fixes the warning raised by the compiler: drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c:168: warning: Function parameter or struct member 'gpu_addr' not described in 'amdgpu_seq64_alloc Cc: Arunpravin Paneer Selvam Signed-off-by: Sunil Khatri Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 0defad71044c6..898d215a8d995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -156,6 +156,7 @@ error: * * @adev: amdgpu_device pointer * @va: VA to access the seq in process address space + * @gpu_addr: GPU address to access the seq * @cpu_addr: CPU address to access the seq * * Alloc a 64 bit memory from seq64 pool. -- GitLab From fb796c308767606bbd6c2e1bf4fa9446ec68ce51 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 27 Aug 2024 15:44:43 +0530 Subject: [PATCH 182/899] drm/amdgpu: add gfx eviction fence helpers This patch adds basic eviction fence framework for the gfx buffers. The idea is to: - One eviction fence is created per gfx process, at kms_open. - This fence is attached to all the gem buffers created by this process. - This fence is detached to all the gem buffers at postclose_kms. This framework will be further used for usermode queues. V2: Addressed review comments from Christian - keep fence_ctx and fence_seq directly in fpriv - evcition_fence should be dynamically allocated - do not save eviction fence instance in BO, there could be many such fences attached to one BO - use dma_resv_replace_fence() in detach V3: Addressed review comments from Christian - eviction fence create and destroy functions should be called only once from fpriv create/destroy - use dma_fence_put() in eviction_fence_destroy V4: Addressed review comments from Christian: - create a separate ev_fence_mgr structure - cleanup fence init part - do not add a domain for fence owner KGD V5: Addressed review comments from Christian: - drop the dma_fence_is_signaled check - use a local variable to access evf_mgr->ev_fence under the spin_lock() multiple places - remove the vm->is_compute_ctx check to attach gfx eviction fence, in gem_object_open V6: Addressed review comments from Christian: - drop the return value from eviction_fence_signal - reserve_fence should be the first thing inside the attach_eviction_fence function, also keep the resv_add_fence inside the lock - remove the unwanted ev_fence check inside detach function - fix wrong variable check in eviction_fence_init function - return the error value of eviction_fence_init to the caller, dont keep it void. - fail gem_object_open if attaching of eviction_fence fails - detach the eviction fence only when amdgpu_vm_is_bo_always_valid is not true. V7: Addressed review comments from Christian: - Do not add a uq_mgr ptr in ev_fence, rather add evf_mgr V8: Move eviction fence enabling into separate patch for CI Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +- .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 144 ++++++++++++++++++ .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 63 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 + 6 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 1cc3079040897..0b0c8ec46516c 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o amdgpu_userq_fence.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index be10fbe293e4f..8c6c3c5451e94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -114,6 +114,7 @@ #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" #include "amdgpu_userqueue.h" +#include "amdgpu_eviction_fence.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -490,7 +491,6 @@ struct amdgpu_flip_work { bool async; }; - /* * file private structure */ @@ -504,6 +504,10 @@ struct amdgpu_fpriv { struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_userq_mgr userq_mgr; + + /* Eviction fence infra */ + struct amdgpu_eviction_fence_mgr evf_mgr; + /** GPU partition selection */ uint32_t xcp_id; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c new file mode 100644 index 0000000000000..056798e2b0509 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include "amdgpu.h" + +static const char * +amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu"; +} + +static const char * +amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_eviction_fence *ef; + + ef = container_of(f, struct amdgpu_eviction_fence, base); + return ef->timeline_name; +} + +static const struct dma_fence_ops amdgpu_eviction_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_eviction_fence_get_driver_name, + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, +}; + +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + spin_lock(&evf_mgr->ev_fence_lock); + dma_fence_signal(&evf_mgr->ev_fence->base); + spin_unlock(&evf_mgr->ev_fence_lock); +} + +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL); + if (!ev_fence) + return NULL; + + ev_fence->evf_mgr = evf_mgr; + get_task_comm(ev_fence->timeline_name, current); + spin_lock_init(&ev_fence->lock); + dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops, + &ev_fence->lock, evf_mgr->ev_fence_ctx, + atomic_inc_return(&evf_mgr->ev_fence_seq)); + return ev_fence; +} + +void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + + if (!ev_fence) + return; + + /* Last unref of ev_fence */ + dma_fence_put(&evf_mgr->ev_fence->base); +} + +int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *ef; + struct amdgpu_eviction_fence *ev_fence; + struct dma_resv *resv = bo->tbo.base.resv; + int ret; + + if (!resv) + return 0; + + ret = dma_resv_reserve_fences(resv, 1); + if (ret) { + DRM_DEBUG_DRIVER("Failed to resv fence space\n"); + return ret; + } + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + if (ev_fence) { + ef = dma_fence_get(&ev_fence->base); + dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP); + } + spin_unlock(&evf_mgr->ev_fence_lock); + return 0; +} + +void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *stub = dma_fence_get_stub(); + + dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx, + stub, DMA_RESV_USAGE_BOOKKEEP); + dma_fence_put(stub); +} + +int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + /* This needs to be done one time per open */ + atomic_set(&evf_mgr->ev_fence_seq, 0); + evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); + spin_lock_init(&evf_mgr->ev_fence_lock); + + ev_fence = amdgpu_eviction_fence_create(evf_mgr); + if (!ev_fence) { + DRM_ERROR("Failed to craete eviction fence\n"); + return -ENOMEM; + } + + spin_lock(&evf_mgr->ev_fence_lock); + evf_mgr->ev_fence = ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h new file mode 100644 index 0000000000000..aba12a43f433f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_EV_FENCE_H_ +#define AMDGPU_EV_FENCE_H_ + +struct amdgpu_eviction_fence { + struct dma_fence base; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; + struct amdgpu_eviction_fence_mgr *evf_mgr; +}; + +struct amdgpu_eviction_fence_mgr { + u64 ev_fence_ctx; + atomic_t ev_fence_seq; + spinlock_t ev_fence_lock; + struct amdgpu_eviction_fence *ev_fence; +}; + +/* Eviction fence helper functions */ +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); + +int +amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +void +amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +int +amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 06171eab6e923..682b76cad359c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -293,6 +293,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, bo_va = amdgpu_vm_bo_add(adev, vm, abo); else ++bo_va->ref_count; + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 2e07776dd408d..700442584f97f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1413,6 +1413,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); + if (r) + goto error_vm; + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev); @@ -1486,6 +1490,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); amdgpu_userq_mgr_fini(&fpriv->userq_mgr); -- GitLab From 30e4d781385dda92fdee574b0a95094dfa143b52 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 3 Jun 2024 11:13:03 +0200 Subject: [PATCH 183/899] drm/amdgpu: add userqueue suspend/resume functions This patch adds userqueue suspend/resume functions at core MES V11 IP level. V2: use true/false for queue_active status (Christian) added Christian's R-B V3: reset/set queue status in mqd.create and mqd.destroy Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 33 +++++++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 5 +++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index fe4efe5ba6acc..ee0a757fcfa34 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -331,6 +331,7 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_ctx; } + queue->queue_active = true; return 0; free_ctx: @@ -354,9 +355,41 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + queue->queue_active = false; +} + +static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + if (queue->queue_active) { + mes_v11_0_userq_unmap(uq_mgr, queue); + queue->queue_active = false; + } + + return 0; +} + +static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + int ret; + + if (queue->queue_active) + return 0; + + ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop); + if (ret) { + DRM_ERROR("Failed to resume queue\n"); + return ret; + } + + queue->queue_active = true; + return 0; } const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = { .mqd_create = mes_v11_0_userq_mqd_create, .mqd_destroy = mes_v11_0_userq_mqd_destroy, + .suspend = mes_v11_0_userq_suspend, + .resume = mes_v11_0_userq_resume, }; diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index b942f3f5ea353..ede0178e03320 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -37,6 +37,7 @@ struct amdgpu_userq_obj { struct amdgpu_usermode_queue { int queue_type; + uint8_t queue_active; uint64_t doorbell_handle; uint64_t doorbell_index; uint64_t flags; @@ -57,6 +58,10 @@ struct amdgpu_userq_funcs { struct amdgpu_usermode_queue *queue); void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *uq); + int (*suspend)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*resume)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); }; /* Usermode queues for gfx */ -- GitLab From b0328087c179f47ea6558c3b91b4487c5e10deda Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 18:59:49 +0100 Subject: [PATCH 184/899] drm/amdgpu: suspend gfx userqueues This patch adds suspend support for gfx userqueues. It typically does the following: - adds an enable_signaling function for the eviction fence, so that it can trigger the userqueue suspend, - adds a delayed work to handle suspending of the eviction_fence - adds a suspend function to handle suspending of userqueues which suspends all the queues under this userq manager and signals the eviction fence, - adds a function to replace the old eviction fence with a new one and attach it to each of the objects, - adds reference of userq manager in the eviction fence container so that it can be used in the suspend function. V2: Addressed Christian's review comments: - schedule suspend work immediately V4: Addressed Christian's review comments: - wait for pending uq fences before starting suspend, added queue->last_fence for the same - accommodate ev_fence_mgr into existing code - some bug fixes and NULL checks V5: Addressed Christian's review comments (gitlab) - Wait for eviction fence to get signaled in destroy, don't signal it - Wait for eviction fence to get signaled in replace fence, don't signal it V6: Addressed Christian's review comments - Do not destroy the old eviction fence until we have it replaced - Change the sequence of fence replacement sub-tasks - reusing the ev_fence delayed work for userqueue suspend as well (Shashank). V7: Addressed Christian's review comments - give evf_mgr as argument (instead of fpriv) to replace_fence() - save ptr to evf_mgr in ev_fence (instead of uq_mgr) - modify suspend_all_queues logic to reflect error properly - remove the garbage drm_exec_lock section in wait_for_signal - grab the userqueue mutex before starting the wait for fence - remove the unrelated gobj check from signal_ioctl V8: Added race condition fixes Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 127 ++++++++++++++++++ .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 4 + .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 37 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 110 +++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 9 ++ 5 files changed, 276 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 056798e2b0509..189afb8727750 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -22,8 +22,12 @@ * */ #include +#include #include "amdgpu.h" +#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name) +#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr) + static const char * amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) { @@ -39,10 +43,131 @@ amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) return ef->timeline_name; } +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec) +{ + struct amdgpu_eviction_fence *old_ef, *new_ef; + struct drm_gem_object *obj; + unsigned long index; + int ret; + + /* + * Steps to replace eviction fence: + * * lock all objects in exec (caller) + * * create a new eviction fence + * * update new eviction fence in evf_mgr + * * attach the new eviction fence to BOs + * * release the old fence + * * unlock the objects (caller) + */ + new_ef = amdgpu_eviction_fence_create(evf_mgr); + if (!new_ef) { + DRM_ERROR("Failed to create new eviction fence\n"); + return -ENOMEM; + } + + /* Update the eviction fence now */ + spin_lock(&evf_mgr->ev_fence_lock); + old_ef = evf_mgr->ev_fence; + evf_mgr->ev_fence = new_ef; + spin_unlock(&evf_mgr->ev_fence_lock); + + /* Attach the new fence */ + drm_exec_for_each_locked_object(exec, index, obj) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (!bo) + continue; + ret = amdgpu_eviction_fence_attach(evf_mgr, bo); + if (ret) { + DRM_ERROR("Failed to attch new eviction fence\n"); + goto free_err; + } + } + + /* Free old fence */ + dma_fence_put(&old_ef->base); + return 0; + +free_err: + kfree(new_ef); + return ret; +} + +static void +amdgpu_eviction_fence_suspend_worker(struct work_struct *work) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); + struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va *bo_va; + struct drm_exec exec; + bool userq_active = amdgpu_userqueue_active(uq_mgr); + int ret; + + + /* For userqueues, the fence replacement happens in resume path */ + if (userq_active) { + amdgpu_userqueue_suspend(uq_mgr); + return; + } + + /* Signal old eviction fence */ + amdgpu_eviction_fence_signal(evf_mgr); + + /* Prepare the objects to replace eviction fence */ + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_drm; + + /* Lock the done list */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + struct amdgpu_bo *bo = bo_va->base.bo; + + if (!bo) + continue; + + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_drm; + } + } + + /* Replace old eviction fence with new one */ + ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + if (ret) + DRM_ERROR("Failed to replace eviction fence\n"); + +unlock_drm: + drm_exec_fini(&exec); +} + +static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr; + struct amdgpu_eviction_fence *ev_fence; + + if (!f) + return true; + + ev_fence = to_ev_fence(f); + evf_mgr = ev_fence->evf_mgr; + + schedule_delayed_work(&evf_mgr->suspend_work, 0); + return true; +} + static const struct dma_fence_ops amdgpu_eviction_fence_ops = { .use_64bit_seqno = true, .get_driver_name = amdgpu_eviction_fence_get_driver_name, .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, + .enable_signaling = amdgpu_eviction_fence_enable_signaling, }; void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) @@ -140,5 +265,7 @@ int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) spin_lock(&evf_mgr->ev_fence_lock); evf_mgr->ev_fence = ev_fence; spin_unlock(&evf_mgr->ev_fence_lock); + + INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index aba12a43f433f..12f168ec3ef00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -37,6 +37,7 @@ struct amdgpu_eviction_fence_mgr { atomic_t ev_fence_seq; spinlock_t ev_fence_lock; struct amdgpu_eviction_fence *ev_fence; + struct delayed_work suspend_work; }; /* Eviction fence helper functions */ @@ -60,4 +61,7 @@ amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 6157a540c9297..877cb17a14e9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -466,6 +466,16 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } + /* Save the fence to wait for during suspend */ + mutex_lock(&userq_mgr->userq_mutex); + + /* Retrieve the user queue */ + queue = idr_find(&userq_mgr->userq_idr, args->queue_id); + if (!queue) { + r = -ENOENT; + mutex_unlock(&userq_mgr->userq_mutex); + } + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); @@ -473,30 +483,35 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; + } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; - } - - /*Retrieve the user queue */ - queue = idr_find(&userq_mgr->userq_idr, args->queue_id); - if (!queue) { - r = -ENOENT; - goto exec_fini; + } } r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; + } /* Create a new fence */ r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); for (i = 0; i < num_read_bo_handles; i++) { if (!gobj_read || !gobj_read[i]->resv) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index e946c6d1dd6be..43fff8869ac9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -60,6 +60,16 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, { struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + struct dma_fence *f = queue->last_fence; + int ret; + + if (f && !dma_fence_is_signaled(f)) { + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) { + DRM_ERROR("Timed out waiting for fence f=%p\n", f); + return; + } + } uq_funcs->mqd_destroy(uq_mgr, queue); amdgpu_userq_fence_driver_free(queue); @@ -67,6 +77,22 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, kfree(queue); } +int +amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + mutex_lock(&uq_mgr->userq_mutex); + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret += queue->queue_active; + + mutex_unlock(&uq_mgr->userq_mutex); + return ret; +} + #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ static struct amdgpu_usermode_queue * amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) @@ -202,6 +228,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); + uq_mgr->num_userqs--; mutex_unlock(&uq_mgr->userq_mutex); return 0; } @@ -277,6 +304,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } args->out.queue_id = qid; + uq_mgr->num_userqs++; unlock: mutex_unlock(&uq_mgr->userq_mutex); @@ -317,11 +345,93 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, } #endif +static int +amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; + + /* Try to suspend all the queues in this process ctx */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret += userq_funcs->suspend(uq_mgr, queue); + + if (ret) + DRM_ERROR("Couldn't suspend all the queues\n"); + return ret; +} + +static int +amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id, ret; + + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + struct dma_fence *f = queue->last_fence; + + if (!f || dma_fence_is_signaled(f)) + continue; + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) { + DRM_ERROR("Timed out waiting for fence f=%p\n", f); + return -ETIMEDOUT; + } + } + + return 0; +} + +void +amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) +{ + int ret; + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; + + mutex_lock(&uq_mgr->userq_mutex); + + /* Wait for any pending userqueue fence to signal */ + ret = amdgpu_userqueue_wait_for_signal(uq_mgr); + if (ret) { + DRM_ERROR("Not suspending userqueue, timeout waiting for work\n"); + goto unlock; + } + + ret = amdgpu_userqueue_suspend_all(uq_mgr); + if (ret) { + DRM_ERROR("Failed to evict userqueue\n"); + goto unlock; + } + + /* Signal current eviction fence */ + amdgpu_eviction_fence_signal(evf_mgr); + + /* Cleanup old eviction fence entry */ + amdgpu_eviction_fence_destroy(evf_mgr); + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { + struct amdgpu_fpriv *fpriv; + mutex_init(&userq_mgr->userq_mutex); idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; + userq_mgr->num_userqs = 0; + + fpriv = uq_mgr_to_fpriv(userq_mgr); + if (!fpriv->evf_mgr.ev_fence) { + DRM_ERROR("Eviction fence not initialized yet\n"); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index ede0178e03320..7781ee59653b9 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -27,6 +27,9 @@ #define AMDGPU_MAX_USERQ_COUNT 512 +#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) +#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) + struct amdgpu_mqd_prop; struct amdgpu_userq_obj { @@ -50,6 +53,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj wptr_obj; struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; + struct dma_fence *last_fence; }; struct amdgpu_userq_funcs { @@ -69,6 +73,7 @@ struct amdgpu_userq_mgr { struct idr userq_idr; struct mutex userq_mutex; struct amdgpu_device *adev; + int num_userqs; }; int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -83,4 +88,8 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); + +void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr); + +int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr); #endif -- GitLab From 44cfdf368fb72c03e4137709803d58288a22cb06 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 19:02:26 +0100 Subject: [PATCH 185/899] drm/amdgpu: resume gfx userqueues This patch adds support for userqueue resume. What it typically does is this: - adds a new delayed work for resuming all the queues. - schedules this delayed work from the suspend work. - validates the BOs and replaces the eviction fence before resuming all the queues running under this instance of userq manager. V2: Addressed Christian's review comments: - declare local variables like ret at the bottom. - lock all the object first, then start attaching the new fence. - dont replace old eviction fence, just attach new eviction fence. - no error logs for drm_exec_lock failures - no need to reserve bos after drm_exec_locked - schedule the resume worker immediately (not after 100 ms) - check for NULL BO (Arvind) V5: Rebased wrt changes in suspend patch - moved amdgpu_userqueue_validate_vm_bo in this patch - initialized ret in resume_all V6: Rebase V7: Addressed review comments from Christian - Do not use list_for_each_safe() with vm->invalidated, its not correct way V8: Fixed the race condition between suspend/close/fence Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 183 +++++++++++++++++- .../gpu/drm/amd/include/amdgpu_userqueue.h | 3 +- 3 files changed, 181 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index 12f168ec3ef00..787182bd1069d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -38,6 +38,7 @@ struct amdgpu_eviction_fence_mgr { spinlock_t ev_fence_lock; struct amdgpu_eviction_fence *ev_fence; struct delayed_work suspend_work; + uint8_t fd_closing; }; /* Eviction fence helper functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 43fff8869ac9a..57e502d014a12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -22,6 +22,7 @@ * */ +#include #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_userqueue.h" @@ -216,6 +217,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; struct amdgpu_usermode_queue *queue; + cancel_delayed_work(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); queue = amdgpu_userqueue_find(uq_mgr, queue_id); @@ -228,7 +230,6 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); - uq_mgr->num_userqs--; mutex_unlock(&uq_mgr->userq_mutex); return 0; } @@ -304,10 +305,18 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } args->out.queue_id = qid; - uq_mgr->num_userqs++; unlock: mutex_unlock(&uq_mgr->userq_mutex); + if (!r) { + /* + * There could be a situation that we are creating a new queue while + * the other queues under this UQ_mgr are suspended. So if there is any + * resume work pending, wait for it to get done. + */ + flush_delayed_work(&uq_mgr->resume_work); + } + return r; } @@ -345,6 +354,161 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, } #endif +static int +amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; + + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret = userq_funcs->resume(uq_mgr, queue); + + if (ret) + DRM_ERROR("Failed to resume all the queue\n"); + return ret; +} + +static int +amdgpu_userqueue_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + DRM_ERROR("Fail to validate\n"); + + return ret; +} + +static int +amdgpu_userqueue_validate_bos(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va *bo_va; + struct ww_acquire_ctx *ticket; + struct drm_exec exec; + struct amdgpu_bo *bo; + struct dma_resv *resv; + bool clear, unlock; + int ret = 0; + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) { + DRM_ERROR("Failed to lock PD\n"); + goto unlock_all; + } + + /* Lock the done list */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + bo = bo_va->base.bo; + if (!bo) + continue; + + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_all; + } + } + + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + + /* Per VM BOs never need to bo cleared in the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) + goto unlock_all; + spin_lock(&vm->status_lock); + } + + ticket = &exec.ticket; + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, + base.vm_status); + resv = bo_va->base.bo->tbo.base.resv; + spin_unlock(&vm->status_lock); + + bo = bo_va->base.bo; + ret = amdgpu_userqueue_validate_vm_bo(NULL, bo); + if (ret) { + DRM_ERROR("Failed to validate BO\n"); + goto unlock_all; + } + + /* Try to reserve the BO to avoid clearing its ptes */ + if (!adev->debug_vm && dma_resv_trylock(resv)) { + clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; + /* Somebody else is using the BO right now */ + } else { + clear = true; + unlock = false; + } + + ret = amdgpu_vm_bo_update(adev, bo_va, clear); + + if (unlock) + dma_resv_unlock(resv); + if (ret) + goto unlock_all; + + spin_lock(&vm->status_lock); + } + spin_unlock(&vm->status_lock); + + ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + if (ret) + DRM_ERROR("Failed to replace eviction fence\n"); + +unlock_all: + drm_exec_fini(&exec); + return ret; +} + +static void amdgpu_userqueue_resume_worker(struct work_struct *work) +{ + struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); + int ret; + + mutex_lock(&uq_mgr->userq_mutex); + + ret = amdgpu_userqueue_validate_bos(uq_mgr); + if (ret) { + DRM_ERROR("Failed to validate BOs to restore\n"); + goto unlock; + } + + ret = amdgpu_userqueue_resume_all(uq_mgr); + if (ret) { + DRM_ERROR("Failed to resume all queues\n"); + goto unlock; + } + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + static int amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) { @@ -393,6 +557,7 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; + mutex_lock(&uq_mgr->userq_mutex); /* Wait for any pending userqueue fence to signal */ @@ -411,8 +576,14 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) /* Signal current eviction fence */ amdgpu_eviction_fence_signal(evf_mgr); - /* Cleanup old eviction fence entry */ - amdgpu_eviction_fence_destroy(evf_mgr); + if (evf_mgr->fd_closing) { + mutex_unlock(&uq_mgr->userq_mutex); + cancel_delayed_work(&uq_mgr->resume_work); + return; + } + + /* Schedule a resume work */ + schedule_delayed_work(&uq_mgr->resume_work, 0); unlock: mutex_unlock(&uq_mgr->userq_mutex); @@ -425,7 +596,6 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi mutex_init(&userq_mgr->userq_mutex); idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; - userq_mgr->num_userqs = 0; fpriv = uq_mgr_to_fpriv(userq_mgr); if (!fpriv->evf_mgr.ev_fence) { @@ -433,6 +603,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi return -EINVAL; } + INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker); return 0; } @@ -441,6 +612,8 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) uint32_t queue_id; struct amdgpu_usermode_queue *queue; + cancel_delayed_work(&userq_mgr->resume_work); + idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 7781ee59653b9..2bf28f3454cb4 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -29,6 +29,7 @@ #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) +#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name) struct amdgpu_mqd_prop; @@ -73,7 +74,7 @@ struct amdgpu_userq_mgr { struct idr userq_idr; struct mutex userq_mutex; struct amdgpu_device *adev; - int num_userqs; + struct delayed_work resume_work; }; int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -- GitLab From b8e6d3f68c3bd1ac54492e210ece87475e7f862b Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 18:04:33 +0100 Subject: [PATCH 186/899] drm/amdgpu: handle eviction fence race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eviction process can get into a race condition between the eviction fence suspend work (which replaces the old fence with new) and kms_close (which destroys the fence and doesn't expect a new one). This patch: - adds a flag to indicate that fd is closing, so fence replacement is not required (evf_mgr->fd_closing) - adds a flush_work() during the ev_fence_destroy routine V2: Addressed review comments from Christian: - Do not use mutex to sync - Use flush_work and wait for suspend_work to be done V3: Fixed state machine for queue->active, which adds into race between suspend/resume and queue ops Cc: Alex Deucher Cc: Christian König Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 3 ++- drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 9 +++++---- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 189afb8727750..c22767a75348b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -117,6 +117,10 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) /* Signal old eviction fence */ amdgpu_eviction_fence_signal(evf_mgr); + /* Do not replace eviction fence is fd is getting closed */ + if (evf_mgr->fd_closing) + return; + /* Prepare the objects to replace eviction fence */ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { @@ -199,6 +203,9 @@ void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) { struct amdgpu_eviction_fence *ev_fence; + /* Wait for any pending work to execute */ + flush_delayed_work(&evf_mgr->suspend_work); + spin_lock(&evf_mgr->ev_fence_lock); ev_fence = evf_mgr->ev_fence; spin_unlock(&evf_mgr->ev_fence_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 700442584f97f..f8370da080388 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1490,10 +1490,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } + fpriv->evf_mgr.fd_closing = true; + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); - amdgpu_userq_mgr_fini(&fpriv->userq_mgr); if (pasid) amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 57e502d014a12..cba51bdf2e2c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -614,9 +614,10 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) cancel_delayed_work(&userq_mgr->resume_work); + mutex_lock(&userq_mgr->userq_mutex); idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id); - idr_destroy(&userq_mgr->userq_idr); + mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index ee0a757fcfa34..b1b7bc47d39f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -139,6 +139,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, return r; } + queue->queue_active = true; DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); return 0; } @@ -160,6 +161,7 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); + queue->queue_active = false; } static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, @@ -331,7 +333,6 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_ctx; } - queue->queue_active = true; return 0; free_ctx: @@ -350,12 +351,12 @@ static void mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - mes_v11_0_userq_unmap(uq_mgr, queue); - amdgpu_bo_unref(&queue->wptr_obj.obj); + if (queue->queue_active) + mes_v11_0_userq_unmap(uq_mgr, queue); + amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); - queue->queue_active = false; } static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, -- GitLab From 825f82cf936aea7f2d25d47efd27f90ba90e4ee2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 13:58:23 -0400 Subject: [PATCH 187/899] drm/amdgpu: add some additional members to amdgpu_mqd_prop These are needed to make userqueue infrastructure generic. Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8c6c3c5451e94..ea5a1e6f01c33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -830,6 +830,9 @@ struct amdgpu_mqd_prop { uint32_t hqd_queue_priority; bool allow_tunneling; bool hqd_active; + uint64_t shadow_addr; + uint64_t gds_bkup_addr; + uint64_t csa_addr; }; struct amdgpu_mqd { -- GitLab From 7179439e34bbeef28e1b5083c365e7295b07f9bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:14:34 -0400 Subject: [PATCH 188/899] drm/amdgpu/gfx11: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address (Shashank) V3: Restore lower_32_bits() for MQD addresses (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 933c4ad5eff9b..b8f75e1ba72ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4127,6 +4127,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); + mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + return 0; } -- GitLab From f2234816a31d0ec85ab63899762ec962ab682704 Mon Sep 17 00:00:00 2001 From: Amaranath Somalapuram Date: Wed, 27 Nov 2024 17:06:45 +0100 Subject: [PATCH 189/899] drm/amdgpu: fix IGT CI regression with eviction fence This patch fixes one of the regressions in eviction fence code with IGT tests. Reviewed-by: Shashank Sharma Signed-off-by: Amaranath Somalapuram Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index c22767a75348b..f7fb1674278c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -136,6 +136,9 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) if (!bo) continue; + if (vm != bo_va->base.vm) + continue; + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); if (unlikely(ret)) -- GitLab From ab328d9a7b614ba8c6f63096eae817dd6e7f72f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:15:12 -0400 Subject: [PATCH 190/899] drm/amdgpu/gfx12: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address mask (Shashank) V3: Use lower_32_bits() for MQD objects (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 4f9a73bae3324..8b409b4a1cb3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3027,6 +3027,12 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + return 0; } -- GitLab From d07a7fcb8d25724351b7f9c03739b814da343b72 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:15:31 -0400 Subject: [PATCH 191/899] drm/amdgpu/sdma6: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address mask (Shashank) V3: Use lower_32_bits for MQD objects (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d4e1b2cd1f359..fe389379e890b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -892,6 +892,9 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + return 0; } -- GitLab From 21926b5db8c1d4e82b47bbd484b085a0e604bfd6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:15:51 -0400 Subject: [PATCH 192/899] drm/amdgpu/sdma7: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower offset mask (Shashank) V2: Use lower_32_bits for mqd objects(Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index b2706221df994..3a0f38fc003e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -935,6 +935,9 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + return 0; } -- GitLab From b965c5d87108add7941528569d7acdfabcd86b55 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Oct 2024 15:16:12 +0200 Subject: [PATCH 193/899] drm/amdgpu/uq: remove gfx11 specifics from UQ setup This can all be handled by in the IP specific mpd init code. V2: Removed setting of gds_va, which was removed during UAPI review (Shashank) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 83 ++++++++----------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index b1b7bc47d39f0..1ba6b91b03c4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -23,8 +23,6 @@ */ #include "amdgpu.h" #include "amdgpu_gfx.h" -#include "v11_structs.h" -#include "mes_v11_0.h" #include "mes_v11_0_userqueue.h" #include "amdgpu_userq_fence.h" @@ -183,52 +181,6 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return r; } - /* Shadow, GDS and CSA objects come directly from userspace */ - if (mqd_user->ip_type == AMDGPU_HW_IP_GFX) { - struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; - struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; - - if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { - DRM_ERROR("Invalid GFX MQD\n"); - return -EINVAL; - } - - mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(mqd_gfx_v11)) { - DRM_ERROR("Failed to read user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; - } - - mqd->shadow_base_lo = mqd_gfx_v11->shadow_va & 0xFFFFFFFC; - mqd->shadow_base_hi = upper_32_bits(mqd_gfx_v11->shadow_va); - - mqd->gds_bkup_base_lo = 0; - mqd->gds_bkup_base_hi = 0; - - mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC; - mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va); - kfree(mqd_gfx_v11); - } else if (mqd_user->ip_type == AMDGPU_HW_IP_DMA) { - struct v11_sdma_mqd *mqd = queue->mqd.cpu_ptr; - struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; - - if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { - DRM_ERROR("Invalid SDMA MQD\n"); - return -EINVAL; - } - - mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(mqd_sdma_v11)) { - DRM_ERROR("Failed to read sdma user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; - } - - mqd->sdmax_rlcx_csa_addr_lo = mqd_sdma_v11->csa_va & 0xFFFFFFFC; - mqd->sdmax_rlcx_csa_addr_hi = upper_32_bits(mqd_sdma_v11->csa_va); - } - return 0; } @@ -300,6 +252,41 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; userq_props->hqd_active = false; kfree(compute_mqd); + } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + return -EINVAL; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + userq_props->shadow_addr = mqd_gfx_v11->shadow_va; + userq_props->csa_addr = mqd_gfx_v11->csa_va; + kfree(mqd_gfx_v11); + } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { + struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid SDMA MQD\n"); + return -EINVAL; + } + + mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_sdma_v11)) { + DRM_ERROR("Failed to read sdma user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + userq_props->csa_addr = mqd_sdma_v11->csa_va; + kfree(mqd_sdma_v11); } queue->userq_prop = userq_props; -- GitLab From 79819d9a0ac38bf83ac712e00be2d53104895b84 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Nov 2024 15:45:19 +0100 Subject: [PATCH 194/899] drm/amdgpu/uq: make MES UQ setup generic Now that all of the IP specific code has been moved into the IP specific functions, we can make this code generic. V2: Fixed build errors and porting logics (Shashank) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +-- ...{mes_v11_0_userqueue.c => mes_userqueue.c} | 77 ++++++++++--------- ...{mes_v11_0_userqueue.h => mes_userqueue.h} | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +- 5 files changed, 51 insertions(+), 48 deletions(-) rename drivers/gpu/drm/amd/amdgpu/{mes_v11_0_userqueue.c => mes_userqueue.c} (84%) rename drivers/gpu/drm/amd/amdgpu/{mes_v11_0_userqueue.h => mes_userqueue.h} (91%) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 0b0c8ec46516c..513c4d64f5542 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -177,7 +177,7 @@ amdgpu-y += \ mes_v12_0.o \ # add GFX userqueue support -amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_v11_0_userqueue.o +amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b8f75e1ba72ca..63b7c7bfcc4a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,7 +48,7 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" #include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 @@ -1623,8 +1623,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif break; case IP_VERSION(11, 0, 1): @@ -1640,8 +1640,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMD_USERQ_GFX - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c similarity index 84% rename from drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 1ba6b91b03c4c..9c2fc8ae0d568 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -23,14 +23,15 @@ */ #include "amdgpu.h" #include "amdgpu_gfx.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" #include "amdgpu_userq_fence.h" +#include "v11_structs.h" #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE static int -mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) { int ret; @@ -58,7 +59,7 @@ err_reserve_bo_failed: } static int -mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, +mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, uint64_t wptr) { @@ -86,7 +87,7 @@ mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, return -EINVAL; } - ret = mes_v11_0_map_gtt_bo_to_gart(wptr_obj->obj); + ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); if (ret) { DRM_ERROR("Failed to map wptr bo to GART\n"); return ret; @@ -96,9 +97,9 @@ mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct amdgpu_mqd_prop *userq_props) +static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct amdgpu_mqd_prop *userq_props) { struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_userq_obj *ctx = &queue->fw_obj; @@ -142,8 +143,8 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; struct mes_remove_queue_input queue_input; @@ -162,9 +163,9 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, queue->queue_active = false; } -static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct drm_amdgpu_userq_in *mqd_user) +static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *mqd_user) { struct amdgpu_userq_obj *ctx = &queue->fw_obj; int r, size; @@ -184,7 +185,7 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue) +static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue) { struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; @@ -192,9 +193,9 @@ static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue) mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); } -static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, - struct drm_amdgpu_userq_in *args_in, - struct amdgpu_usermode_queue *queue) +static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args_in, + struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; @@ -257,14 +258,15 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { DRM_ERROR("Invalid GFX MQD\n"); - return -EINVAL; + r = -EINVAL; + goto free_mqd; } mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); if (IS_ERR(mqd_gfx_v11)) { DRM_ERROR("Failed to read user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; + r = -ENOMEM; + goto free_mqd; } userq_props->shadow_addr = mqd_gfx_v11->shadow_va; @@ -275,14 +277,15 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { DRM_ERROR("Invalid SDMA MQD\n"); - return -EINVAL; + r = -EINVAL; + goto free_mqd; } mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); if (IS_ERR(mqd_sdma_v11)) { DRM_ERROR("Failed to read sdma user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; + r = -ENOMEM; + goto free_mqd; } userq_props->csa_addr = mqd_sdma_v11->csa_va; @@ -298,23 +301,23 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, } /* Create BO for FW operations */ - r = mes_v11_0_userq_create_ctx_space(uq_mgr, queue, mqd_user); + r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); if (r) { DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); goto free_mqd; } - mes_v11_0_userq_set_fence_space(queue); + mes_userq_set_fence_space(queue); /* FW expects WPTR BOs to be mapped into GART */ - r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { DRM_ERROR("Failed to create WPTR mapping\n"); goto free_ctx; } /* Map userqueue into FW using MES */ - r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); + r = mes_userq_map(uq_mgr, queue, userq_props); if (r) { DRM_ERROR("Failed to init MQD\n"); goto free_ctx; @@ -335,29 +338,29 @@ free_props: } static void -mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, +mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { if (queue->queue_active) - mes_v11_0_userq_unmap(uq_mgr, queue); + mes_userq_unmap(uq_mgr, queue); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); } -static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, +static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { if (queue->queue_active) { - mes_v11_0_userq_unmap(uq_mgr, queue); + mes_userq_unmap(uq_mgr, queue); queue->queue_active = false; } return 0; } -static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, +static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { int ret; @@ -365,7 +368,7 @@ static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, if (queue->queue_active) return 0; - ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop); + ret = mes_userq_map(uq_mgr, queue, queue->userq_prop); if (ret) { DRM_ERROR("Failed to resume queue\n"); return ret; @@ -375,9 +378,9 @@ static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, return 0; } -const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = { - .mqd_create = mes_v11_0_userq_mqd_create, - .mqd_destroy = mes_v11_0_userq_mqd_destroy, - .suspend = mes_v11_0_userq_suspend, - .resume = mes_v11_0_userq_resume, +const struct amdgpu_userq_funcs userq_mes_funcs = { + .mqd_create = mes_userq_mqd_create, + .mqd_destroy = mes_userq_mqd_destroy, + .suspend = mes_userq_suspend, + .resume = mes_userq_resume, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h similarity index 91% rename from drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.h index 2c102361ca821..d0a521312ad4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h @@ -22,9 +22,9 @@ * */ -#ifndef MES_V11_0_USERQ_H -#define MES_V11_0_USERQ_H +#ifndef MES_USERQ_H +#define MES_USERQ_H #include "amdgpu_userqueue.h" -extern const struct amdgpu_userq_funcs userq_mes_v11_0_funcs; +extern const struct amdgpu_userq_funcs userq_mes_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index fe389379e890b..9bc3c7a35d185 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,7 +43,7 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -1381,7 +1381,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) -- GitLab From 988c9e704670a39ef2fd23f3eeb2d3b9c813dab1 Mon Sep 17 00:00:00 2001 From: Somalapuram Amaranath Date: Thu, 10 Oct 2024 20:08:06 +0200 Subject: [PATCH 195/899] drm/amdgpu: enable userqueue support for GFX12 This patch enables Usermode queue support across GFX, Compute and SDMA IPs on GFX12/SDMA7. It typically reuses Navi3X userqueue IP functions to create and destroy MQDs. v2: rebase on proposed changes (Alex) Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Somalapuram Amaranath Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 8b409b4a1cb3c..ee65f4057872a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -44,6 +44,7 @@ #include "gfx_v12_0.h" #include "nbif_v6_3_1.h" #include "mes_v12_0.h" +#include "mes_userqueue.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -1417,6 +1418,10 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; +#endif break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 3a0f38fc003e8..3514089acd942 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -42,6 +42,7 @@ #include "sdma_common.h" #include "sdma_v7_0.h" #include "v12_structs.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); @@ -1381,6 +1382,11 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; +#endif + + return r; } -- GitLab From dd5a376cd234c3fff79667598a1e06300cf75026 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Tue, 26 Nov 2024 15:51:08 +0100 Subject: [PATCH 196/899] drm/amdgpu: enable userqueue secure sem for GFX 12 - Add a field in struct amdgpu_mqd_prop for userqueue secure sem fence address since now we have a generic file for mes_userqueue.c - Add secure sem fence address mqd support to gfx12 into their corresponding init functions. - Enable secure semaphore IRQ handling V2: Address review comment from Alex: Use fence_address instead of fenceaddress (Shashank) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Signed-off-by: Somalapuram Amaranath Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 11 +-------- drivers/gpu/drm/amd/include/v11_structs.h | 4 ++-- drivers/gpu/drm/amd/include/v12_structs.h | 4 ++-- 6 files changed, 22 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ea5a1e6f01c33..3cdb5f8325aa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -833,6 +833,7 @@ struct amdgpu_mqd_prop { uint64_t shadow_addr; uint64_t gds_bkup_addr; uint64_t csa_addr; + uint64_t fence_address; }; struct amdgpu_mqd { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 63b7c7bfcc4a0..114284e65c7c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4134,6 +4134,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index ee65f4057872a..1030f2985c7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -45,6 +45,7 @@ #include "nbif_v6_3_1.h" #include "mes_v12_0.h" #include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -3037,6 +3038,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } @@ -4819,25 +4822,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9c2fc8ae0d568..1dde099382ea2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -185,14 +185,6 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue) -{ - struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; - - mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr); - mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); -} - static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -231,6 +223,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; userq_props->use_doorbell = true; userq_props->doorbell_index = queue->doorbell_index; + userq_props->fence_address = queue->fence_drv->gpu_addr; if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; @@ -307,8 +300,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } - mes_userq_set_fence_space(queue); - /* FW expects WPTR BOs to be mapped into GART */ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index 797ce6a1e56eb..f6d4dab849eb8 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -535,8 +535,8 @@ struct v11_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t fenceaddress_lo; // offset: 510 (0x1FE) - uint32_t fenceaddress_hi; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v11_sdma_mqd { diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h index 5eabab611b022..5787c8a51b7cd 100644 --- a/drivers/gpu/drm/amd/include/v12_structs.h +++ b/drivers/gpu/drm/amd/include/v12_structs.h @@ -535,8 +535,8 @@ struct v12_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t reserved_510; // offset: 510 (0x1FE) - uint32_t reserved_511; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v12_sdma_mqd { -- GitLab From a242a3e4b5be22ffd85fb768d8c96df79b018136 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 11 Dec 2024 12:09:00 +0100 Subject: [PATCH 197/899] drm/amdgpu: simplify eviction fence suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The basic idea in this redesign is to add an eviction fence only in UQ resume path. When userqueue is not present, keep ev_fence as NULL Main changes are: - do not create the eviction fence during evf_mgr_init, keeping evf_mgr->ev_fence=NULL until UQ get active. - do not replace the ev_fence in evf_resume path, but replace it only in uq_resume path, so remove all the unnecessary code from ev_fence_resume. - add a new helper function (amdgpu_userqueue_ensure_ev_fence) which will do the following: - flush any pending uq_resume work, so that it could create an eviction_fence - if there is no pending uq_resume_work, add a uq_resume work and wait for it to execute so that we always have a valid ev_fence - call this helper function from two places, to ensure we have a valid ev_fence: - when a new uq is created - when a new uq completion fence is created v2: Worked on review comments by Christian. v3: Addressed few more review comments by Christian. v4: Move mutex lock outside of the amdgpu_userqueue_suspend() function (Christian). v5: squash in build fix (Alex) Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 78 ++++--------------- .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 3 +- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 24 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 68 +++++++++------- .../gpu/drm/amd/include/amdgpu_userqueue.h | 7 +- 5 files changed, 69 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index f7fb1674278c2..8358dc6b68ad3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -87,7 +87,8 @@ amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, } /* Free old fence */ - dma_fence_put(&old_ef->base); + if (old_ef) + dma_fence_put(&old_ef->base); return 0; free_err: @@ -101,58 +102,17 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_bo_va *bo_va; - struct drm_exec exec; - bool userq_active = amdgpu_userqueue_active(uq_mgr); - int ret; - - - /* For userqueues, the fence replacement happens in resume path */ - if (userq_active) { - amdgpu_userqueue_suspend(uq_mgr); - return; - } - - /* Signal old eviction fence */ - amdgpu_eviction_fence_signal(evf_mgr); - - /* Do not replace eviction fence is fd is getting closed */ - if (evf_mgr->fd_closing) - return; - - /* Prepare the objects to replace eviction fence */ - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { - ret = amdgpu_vm_lock_pd(vm, &exec, 2); - drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) - goto unlock_drm; - - /* Lock the done list */ - list_for_each_entry(bo_va, &vm->done, base.vm_status) { - struct amdgpu_bo *bo = bo_va->base.bo; - - if (!bo) - continue; - - if (vm != bo_va->base.vm) - continue; + struct amdgpu_eviction_fence *ev_fence; - ret = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) - goto unlock_drm; - } - } + mutex_lock(&uq_mgr->userq_mutex); + ev_fence = evf_mgr->ev_fence; + if (!ev_fence) + goto unlock; - /* Replace old eviction fence with new one */ - ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); - if (ret) - DRM_ERROR("Failed to replace eviction fence\n"); + amdgpu_userqueue_suspend(uq_mgr, ev_fence); -unlock_drm: - drm_exec_fini(&exec); +unlock: + mutex_unlock(&uq_mgr->userq_mutex); } static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) @@ -177,10 +137,11 @@ static const struct dma_fence_ops amdgpu_eviction_fence_ops = { .enable_signaling = amdgpu_eviction_fence_enable_signaling, }; -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence) { spin_lock(&evf_mgr->ev_fence_lock); - dma_fence_signal(&evf_mgr->ev_fence->base); + dma_fence_signal(&ev_fence->base); spin_unlock(&evf_mgr->ev_fence_lock); } @@ -244,6 +205,7 @@ int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP); } spin_unlock(&evf_mgr->ev_fence_lock); + return 0; } @@ -259,23 +221,11 @@ void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) { - struct amdgpu_eviction_fence *ev_fence; - /* This needs to be done one time per open */ atomic_set(&evf_mgr->ev_fence_seq, 0); evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); spin_lock_init(&evf_mgr->ev_fence_lock); - ev_fence = amdgpu_eviction_fence_create(evf_mgr); - if (!ev_fence) { - DRM_ERROR("Failed to craete eviction fence\n"); - return -ENOMEM; - } - - spin_lock(&evf_mgr->ev_fence_lock); - evf_mgr->ev_fence = ev_fence; - spin_unlock(&evf_mgr->ev_fence_lock); - INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index 787182bd1069d..fcd867b7147dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -60,7 +60,8 @@ int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); void -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence); int amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 877cb17a14e9a..20c36dc97c2ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -466,14 +466,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - /* Save the fence to wait for during suspend */ - mutex_lock(&userq_mgr->userq_mutex); - /* Retrieve the user queue */ queue = idr_find(&userq_mgr->userq_idr, args->queue_id); if (!queue) { r = -ENOENT; - mutex_unlock(&userq_mgr->userq_mutex); + goto put_gobj_write; } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, @@ -483,31 +480,26 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } } r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } /* Create a new fence */ r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); dma_fence_put(queue->last_fence); queue->last_fence = dma_fence_get(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index cba51bdf2e2c6..c11fcdd604fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -101,6 +101,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) return idr_find(&uq_mgr->userq_idr, qid); } +void +amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + +retry: + /* Flush any pending resume work to create ev_fence */ + flush_delayed_work(&uq_mgr->resume_work); + + mutex_lock(&uq_mgr->userq_mutex); + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { + mutex_unlock(&uq_mgr->userq_mutex); + /* + * Looks like there was no pending resume work, + * add one now to create a valid eviction fence + */ + schedule_delayed_work(&uq_mgr->resume_work, 0); + goto retry; + } +} + int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj, int size) @@ -253,7 +278,14 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) return -EINVAL; } - mutex_lock(&uq_mgr->userq_mutex); + /* + * There could be a situation that we are creating a new queue while + * the other queues under this UQ_mgr are suspended. So if there is any + * resume work pending, wait for it to get done. + * + * This will also make sure we have a valid eviction fence ready to be used. + */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { @@ -308,14 +340,6 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) unlock: mutex_unlock(&uq_mgr->userq_mutex); - if (!r) { - /* - * There could be a situation that we are creating a new queue while - * the other queues under this UQ_mgr are suspended. So if there is any - * resume work pending, wait for it to get done. - */ - flush_delayed_work(&uq_mgr->resume_work); - } return r; } @@ -551,58 +575,44 @@ amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) } void -amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) +amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence) { int ret; struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; - - mutex_lock(&uq_mgr->userq_mutex); - - /* Wait for any pending userqueue fence to signal */ + /* Wait for any pending userqueue fence work to finish */ ret = amdgpu_userqueue_wait_for_signal(uq_mgr); if (ret) { DRM_ERROR("Not suspending userqueue, timeout waiting for work\n"); - goto unlock; + return; } ret = amdgpu_userqueue_suspend_all(uq_mgr); if (ret) { DRM_ERROR("Failed to evict userqueue\n"); - goto unlock; + return; } /* Signal current eviction fence */ - amdgpu_eviction_fence_signal(evf_mgr); + amdgpu_eviction_fence_signal(evf_mgr, ev_fence); if (evf_mgr->fd_closing) { - mutex_unlock(&uq_mgr->userq_mutex); cancel_delayed_work(&uq_mgr->resume_work); return; } /* Schedule a resume work */ schedule_delayed_work(&uq_mgr->resume_work, 0); - -unlock: - mutex_unlock(&uq_mgr->userq_mutex); } int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { - struct amdgpu_fpriv *fpriv; - mutex_init(&userq_mgr->userq_mutex); idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; - fpriv = uq_mgr_to_fpriv(userq_mgr); - if (!fpriv->evf_mgr.ev_fence) { - DRM_ERROR("Eviction fence not initialized yet\n"); - return -EINVAL; - } - INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker); return 0; } diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 2bf28f3454cb4..e7e8d79b689d5 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -24,6 +24,7 @@ #ifndef AMDGPU_USERQUEUE_H_ #define AMDGPU_USERQUEUE_H_ +#include "amdgpu_eviction_fence.h" #define AMDGPU_MAX_USERQ_COUNT 512 @@ -90,7 +91,11 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); -void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr); +void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence); int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr); + +void amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr); #endif -- GitLab From 31f7efcdca4d1caaa3a0babc33377e27e6f9b593 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 18:45:33 +0100 Subject: [PATCH 198/899] drm/amdgpu: enable eviction fence This patch enables attachment and detachment of eviction fences. This is just a fork of eviction fence enabling code from the first patch of the series so that the CI testing can happen on fully fledged code. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 8358dc6b68ad3..167951aee502d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -177,6 +177,8 @@ void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) if (!ev_fence) return; + dma_fence_wait(&ev_fence->base, false); + /* Last unref of ev_fence */ dma_fence_put(&evf_mgr->ev_fence->base); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 682b76cad359c..6850a6954a716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -294,6 +294,13 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, else ++bo_va->ref_count; + /* attach gfx eviction fence */ + r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); + if (r) { + DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); + return r; + } + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic @@ -344,6 +351,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); -- GitLab From 9ed335d9398475675e26fcf92d7cf956e5b8a605 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 9 Dec 2024 19:20:56 +0530 Subject: [PATCH 199/899] drm/amdgpu: Add mqd for userq compute queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add mqd for userq compute queue for gfx11/gfx12 Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 4 ++++ drivers/gpu/drm/amd/include/v11_structs.h | 4 ++-- drivers/gpu/drm/amd/include/v12_structs.h | 4 ++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 114284e65c7c6..d3b4018580c68 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4312,6 +4312,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 1030f2985c7e3..0e95c1cfeca63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3215,6 +3215,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index f6d4dab849eb8..3728389fc3bee 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -1118,8 +1118,8 @@ struct v11_compute_mqd { uint32_t reserved_443; // offset: 443 (0x1BB) uint32_t reserved_444; // offset: 444 (0x1BC) uint32_t reserved_445; // offset: 445 (0x1BD) - uint32_t reserved_446; // offset: 446 (0x1BE) - uint32_t reserved_447; // offset: 447 (0x1BF) + uint32_t fence_address_lo; // offset: 446 (0x1BE) + uint32_t fence_address_hi; // offset: 447 (0x1BF) uint32_t gws_0_val; // offset: 448 (0x1C0) uint32_t gws_1_val; // offset: 449 (0x1C1) uint32_t gws_2_val; // offset: 450 (0x1C2) diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h index 5787c8a51b7cd..03a35f8a65b08 100644 --- a/drivers/gpu/drm/amd/include/v12_structs.h +++ b/drivers/gpu/drm/amd/include/v12_structs.h @@ -1118,8 +1118,8 @@ struct v12_compute_mqd { uint32_t reserved_443; // offset: 443 (0x1BB) uint32_t reserved_444; // offset: 444 (0x1BC) uint32_t reserved_445; // offset: 445 (0x1BD) - uint32_t reserved_446; // offset: 446 (0x1BE) - uint32_t reserved_447; // offset: 447 (0x1BF) + uint32_t fence_address_lo; // offset: 446 (0x1BE) + uint32_t fence_address_hi; // offset: 447 (0x1BF) uint32_t gws_0_val; // offset: 448 (0x1C0) uint32_t gws_1_val; // offset: 449 (0x1C1) uint32_t gws_2_val; // offset: 450 (0x1C2) -- GitLab From c9e20cb005fdb6a727dc1a85d7192a35eeb11987 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 9 Dec 2024 23:02:28 +0530 Subject: [PATCH 200/899] drm/amdgpu: Fix NULL ptr dereference issue for non userq fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the correct fences count variable [num_fences] in the fences array iteration to handle the userq / non-userq fences. v2:(Christian) - All fences in the array either come from some reservation object or drm_syncobj. If any of those are NULL then there is a bug somewhere else. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 20c36dc97c2ee..8a4d9495f9d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -817,7 +817,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, fences[num_fences++] = fence; } - for (i = 0, cnt = 0; i < wait_info->num_fences; i++) { + for (i = 0, cnt = 0; i < num_fences; i++) { struct amdgpu_userq_fence_driver *fence_drv; struct amdgpu_userq_fence *userq_fence; u32 index; -- GitLab From ed5fdc1fc282dbe7bdf4968fb9dafb4366114160 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 9 Dec 2024 23:04:34 +0530 Subject: [PATCH 201/899] drm/amdgpu: Fix the use-after-free issue in wait IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xarray pointer which has the userqueue xarray structure reference should be cleared when the userqueue gets destroyed. Otherwise, we may access the freed xa memory and see the below warnings. warning 1: BUG: KASAN: slab-use-after-free in _raw_spin_lock+0x7a/0xe0 [ +0.000044] Call Trace: [ +0.000017] [ +0.000016] dump_stack_lvl+0x6c/0x90 [ +0.000025] print_report+0xc4/0x5e0 [ +0.000025] ? srso_return_thunk+0x5/0x5f [ +0.000024] ? kasan_complete_mode_report_info+0x60/0x1d0 [ +0.000030] ? _raw_spin_lock+0x7a/0xe0 [ +0.000023] kasan_report+0xdf/0x120 [ +0.000023] ? _raw_spin_lock+0x7a/0xe0 [ +0.000025] kasan_check_range+0xf7/0x1b0 [ +0.000025] __kasan_check_write+0x14/0x20 [ +0.000024] _raw_spin_lock+0x7a/0xe0 [ +0.000023] ? __pfx__raw_spin_lock+0x10/0x10 [ +0.000024] ? amdgpu_userq_wait_ioctl+0xac0/0x1f30 [amdgpu] [ +0.000442] amdgpu_userq_wait_ioctl+0x18fc/0x1f30 [amdgpu] [ +0.000428] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ +0.000424] ? __pfx_idr_alloc_u32+0x10/0x10 [ +0.000027] ? srso_return_thunk+0x5/0x5f [ +0.000024] ? __kasan_check_write+0x14/0x20 [ +0.000025] ? srso_return_thunk+0x5/0x5f [ +0.000024] ? idr_alloc+0x72/0xc0 [ +0.000023] ? srso_return_thunk+0x5/0x5f [ +0.000023] ? fput+0x1c/0x2f0 [ +0.000025] drm_ioctl_kernel+0x178/0x2f0 [drm] [ +0.000065] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ +0.000425] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] [ +0.000064] ? srso_return_thunk+0x5/0x5f [ +0.000023] ? __kasan_check_write+0x14/0x20 [ +0.000025] drm_ioctl+0x513/0xd20 [drm] [ +0.000058] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ +0.000428] ? __pfx_drm_ioctl+0x10/0x10 [drm] [ +0.000061] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ +0.000027] ? __count_memcg_events+0x11f/0x3a0 [ +0.000027] ? srso_return_thunk+0x5/0x5f [ +0.001040] ? srso_return_thunk+0x5/0x5f [ +0.000969] ? _raw_spin_unlock_irqrestore+0x27/0x50 [ +0.000966] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu] [ +0.001352] __x64_sys_ioctl+0x135/0x1b0 [ +0.000966] x64_sys_call+0x1205/0x20d0 [ +0.000968] do_syscall_64+0x4d/0x120 [ +0.000960] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000962] RIP: 0033:0x7f42af11a94f warning 2: WARNING: at lib/xarray.c:1849 __xa_alloc+0x13a/0x150 [ 366.491409] RIP: 0010:__xa_alloc+0x13a/0x150 [ 366.491434] Call Trace: [ 366.491437] [ 366.491440] ? show_regs+0x6d/0x80 [ 366.491445] ? __warn+0x91/0x140 [ 366.491450] ? __xa_alloc+0x13a/0x150 [ 366.491453] ? report_bug+0x1c9/0x1e0 [ 366.491459] ? handle_bug+0x63/0xa0 [ 366.491463] ? exc_invalid_op+0x1d/0x80 [ 366.491467] ? asm_exc_invalid_op+0x1f/0x30 [ 366.491476] ? __xa_alloc+0x13a/0x150 [ 366.491484] amdgpu_userq_wait_ioctl+0xe0e/0xfe0 [amdgpu] [ 366.491743] ? idr_alloc_u32+0x97/0xd0 [ 366.491749] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ 366.491912] drm_ioctl_kernel+0xae/0x100 [drm] [ 366.491942] drm_ioctl+0x2a1/0x500 [drm] [ 366.491961] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ 366.492127] ? srso_return_thunk+0x5/0x5f [ 366.492132] ? srso_return_thunk+0x5/0x5f [ 366.492135] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 366.492139] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 366.492288] __x64_sys_ioctl+0x99/0xd0 [ 366.492295] x64_sys_call+0x1209/0x20d0 [ 366.492299] do_syscall_64+0x51/0x120 [ 366.492303] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 366.492418] RIP: 0033:0x7f86f3b1a94f Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index c11fcdd604fc8..85baba323ba53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -73,6 +73,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, } uq_funcs->mqd_destroy(uq_mgr, queue); + queue->fence_drv->fence_drv_xa_ptr = NULL; amdgpu_userq_fence_driver_free(queue); idr_remove(&uq_mgr->userq_idr, queue_id); kfree(queue); -- GitLab From 91acb5d47b7cc9fe17a7d0034e5f5ac996633bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 9 Dec 2024 23:10:48 +0530 Subject: [PATCH 202/899] drm/amdgpu: Modify the MES process va end limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify the MES process va end limit to max pfn. Signed-off-by: Christian König Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 3 +-- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index acdca3110c248..503add9386340 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -656,8 +656,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, adev->gmc.vram_start; queue_input.process_va_start = 0; - queue_input.process_va_end = - (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; queue_input.process_quantum = gang->process->process_quantum; queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; queue_input.gang_quantum = gang->gang_quantum; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 1dde099382ea2..9a6a5553bbc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -109,7 +109,7 @@ static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_va_start = 0; - queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; /* set process quantum to 10 ms and gang quantum to 1 ms as default */ queue_input.process_quantum = 100000; -- GitLab From 02521454f0552e289e6d95a2b55c8395285b0e01 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Thu, 12 Dec 2024 19:36:16 +0530 Subject: [PATCH 203/899] drm/amdgpu: Apply sign extension to seq64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply sign extension to seq64 va address. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 898d215a8d995..2de1a844282ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -45,7 +45,11 @@ */ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) { - return AMDGPU_VA_RESERVED_SEQ64_START(adev); + u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev); + + addr = amdgpu_gmc_sign_extend(addr); + + return addr; } /** @@ -88,7 +92,7 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error; } - seq64_addr = amdgpu_seq64_get_va_base(adev); + seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, AMDGPU_PTE_READABLE); if (r) { -- GitLab From adba0929736a6a2d2780e8e6e4082e42e5ba025c Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 19 Dec 2024 19:43:54 +0530 Subject: [PATCH 204/899] drm/amdgpu: Fix Illegal opcode in command stream Error When applications closes, it triggers the drm_file_free function which subsequently releases all allocated buffer objects. Concurrently, the resume_worker thread will attempt to map the usermode queue. However, since the wptr buffer object has already been deallocated, this will result in an Illegal opcode error being raised in the command stream. Now replacing drm_release() with a new function amdgpu_drm_release(). This function will set the flag to prevent the scheduling of any new queue resume/map, stop all queues and then call drm_release(). V2: - Replace drm_release with amdgpu_drm_release(Christian). Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 148f859c3b989..5c55f6e01b414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2851,6 +2851,20 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return ret; } +static int amdgpu_drm_release(struct inode *inode, struct file *filp) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + if (fpriv) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + } + + return drm_release(inode, filp); +} + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2902,7 +2916,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, .flush = amdgpu_flush, - .release = drm_release, + .release = amdgpu_drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, -- GitLab From 8639d2f5ca27ca533e782cc8f8de62ea002f1833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 20 Dec 2024 13:44:23 +0100 Subject: [PATCH 205/899] drm/amdgpu: fix call to amdgpu_eviction_fence_detach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That needs to be done after grabbing the lock, not before. Signed-off-by: Christian König Acked-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6850a6954a716..b9b80b0b60cad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -351,9 +351,6 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); @@ -367,6 +364,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; } + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) goto out_unlock; -- GitLab From 3e37fcb57bdf794804bec9538d20673a5bf4bdd8 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Fri, 3 Jan 2025 19:02:59 +0530 Subject: [PATCH 206/899] drm/amdgpu: map doorbell for the requested userq Introduce db_info structure to the populate the doorbell information that is required to be mapped. Made changes to the doorbell mapping func more generic, by taking parameters that vary based on IPs and/or usecase into db_info structure. v2 - Fix space alignment and checkpatch warnings(Shashank) Signed-off-by: Saleemkhan Jamadar Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 27 +++++++++++-------- .../gpu/drm/amd/include/amdgpu_userqueue.h | 12 +++++++++ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 85baba323ba53..769154223e2db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -189,18 +189,17 @@ void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, amdgpu_bo_unref(&userq_obj->obj); } -static uint64_t +uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct drm_file *filp, - uint32_t doorbell_offset) + struct amdgpu_db_info *db_info, + struct drm_file *filp) { uint64_t index; struct drm_gem_object *gobj; - struct amdgpu_userq_obj *db_obj = &queue->db_obj; - int r; + struct amdgpu_userq_obj *db_obj = db_info->db_obj; + int r, db_size; - gobj = drm_gem_object_lookup(filp, queue->doorbell_handle); + gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle); if (gobj == NULL) { DRM_ERROR("Can't find GEM object for doorbell\n"); return -EINVAL; @@ -222,8 +221,9 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } + db_size = sizeof(u64); index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, - doorbell_offset, sizeof(u64)); + db_info->doorbell_offset, db_size); DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index); amdgpu_bo_unreserve(db_obj->obj); return index; @@ -268,6 +268,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; + struct amdgpu_db_info db_info; uint64_t index; int qid, r = 0; @@ -302,19 +303,23 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } queue->doorbell_handle = args->in.doorbell_handle; - queue->doorbell_index = args->in.doorbell_offset; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; + /* Convert relative doorbell offset into absolute doorbell index */ - index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, filp, args->in.doorbell_offset); + index = amdgpu_userqueue_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { DRM_ERROR("Failed to get doorbell for queue\n"); kfree(queue); goto unlock; } - queue->doorbell_index = index; + queue->doorbell_index = index; xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, queue); if (r) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index e7e8d79b689d5..0f358f77f2d9b 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -78,6 +78,13 @@ struct amdgpu_userq_mgr { struct delayed_work resume_work; }; +struct amdgpu_db_info { + uint64_t doorbell_handle; + uint32_t queue_type; + uint32_t doorbell_offset; + struct amdgpu_userq_obj *db_obj; +}; + int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev); @@ -98,4 +105,9 @@ int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr); void amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); + +uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_db_info *db_info, + struct drm_file *filp); + #endif -- GitLab From 49cd3353dbea6bb5c5a9c3201852fc363a2f34ad Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Mon, 6 Jan 2025 12:50:50 +0530 Subject: [PATCH 207/899] drm/amdgpu: add db size and offset range for VCN and VPE VCN and VPE have different offset range, update the doorbell offset range repsectively. Doorbell size for VCN and VPE is 32bit. v1 : add gfx switch case and fix checkpatch warnings (Shashank) Signed-off-by: Saleemkhan Jamadar Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 769154223e2db..2eac83ba8bdf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -221,7 +221,29 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } - db_size = sizeof(u64); + switch (db_info->queue_type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_COMPUTE: + case AMDGPU_HW_IP_DMA: + db_size = sizeof(u64); + break; + + case AMDGPU_HW_IP_VCN_ENC: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; + break; + + case AMDGPU_HW_IP_VPE: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; + break; + + default: + DRM_ERROR("[Usermode queues] IP %d not support\n", db_info->queue_type); + r = -EINVAL; + goto unpin_bo; + } + index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, db_info->doorbell_offset, db_size); DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index); -- GitLab From 239a310b4942a81f5e87a442bf1bcb759494f0c1 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 1 Jan 2025 14:22:29 +0530 Subject: [PATCH 208/899] drm/amdgpu: Fix out-of-bounds issue in user fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix out-of-bounds issue in userq fence create when accessing the userq xa structure. Added a lock to protect the race condition. v2:(Christian) - Allocate memory with GFP_ATOMIC. v3: - Moved to 2 xa approach. v4:(Christian) - Lock the xa_for_each blocks and memory allocation part as well to make sure that xa is not modified in between the 2 xa_for_each blocks. BUG: KASAN: slab-out-of-bounds in amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000006] Call Trace: [ +0.000005] [ +0.000005] dump_stack_lvl+0x6c/0x90 [ +0.000011] print_report+0xc4/0x5e0 [ +0.000009] ? srso_return_thunk+0x5/0x5f [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0 [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000405] kasan_report+0xdf/0x120 [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000405] __asan_report_store8_noabort+0x17/0x20 [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu] [ +0.000408] ? srso_return_thunk+0x5/0x5f [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm] [ +0.000013] ? srso_return_thunk+0x5/0x5f [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu] [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ +0.000404] ? try_to_wake_up+0x165/0x1840 [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10 [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm] [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] [ +0.000043] ? __kasan_check_read+0x11/0x20 [ +0.000007] ? srso_return_thunk+0x5/0x5f [ +0.000007] ? __kasan_check_write+0x14/0x20 [ +0.000008] drm_ioctl+0x513/0xd20 [drm] [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm] [ +0.000044] ? srso_return_thunk+0x5/0x5f [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100 [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30 [ +0.000008] ? srso_return_thunk+0x5/0x5f [ +0.000008] ? srso_return_thunk+0x5/0x5f [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50 [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu] [ +0.000388] __x64_sys_ioctl+0x135/0x1b0 [ +0.000009] x64_sys_call+0x1205/0x20d0 [ +0.000007] do_syscall_64+0x4d/0x120 [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000007] RIP: 0033:0x7f7c3d31a94f Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 8a4d9495f9d78..567a5ffa77651 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -229,23 +229,25 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, unsigned long index, count = 0; int i = 0; + xa_lock(&userq->fence_drv_xa); xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) count++; userq_fence->fence_drv_array = kvmalloc_array(count, sizeof(struct amdgpu_userq_fence_driver *), - GFP_KERNEL); + GFP_ATOMIC); if (userq_fence->fence_drv_array) { xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { userq_fence->fence_drv_array[i] = stored_fence_drv; - xa_erase(&userq->fence_drv_xa, index); + __xa_erase(&userq->fence_drv_xa, index); i++; } } userq_fence->fence_drv_array_count = i; + xa_unlock(&userq->fence_drv_xa); } else { userq_fence->fence_drv_array = NULL; userq_fence->fence_drv_array_count = 0; -- GitLab From fc4a85c6b2cc1b0f4e682180165a4e629711701f Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 10 Feb 2025 22:17:28 +0530 Subject: [PATCH 209/899] drm/amdgpu: Modify the seq64 VM cache policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The seq64 VM cache policy should be set to UC (Uncached) to match with userqueue fence address kernel mapped memory's cache settings. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 2de1a844282ec..3939761be31c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; - u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -93,8 +93,10 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, } seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; + + va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - AMDGPU_PTE_READABLE); + va_flags); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); -- GitLab From f15d4e92f7d35128d6416c473c2eab24188bd7e8 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 27 Jan 2025 18:22:01 +0530 Subject: [PATCH 210/899] drm/amdgpu: Fix display freeze lockup error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A deadlock situation has arised between the userq signal ioctl and the eviction fence. In this scenario, the function amdgpu_userq_signal_ioctl() has acquired a reservation lock on the read/write buffer object (BO) through drm_exec. Subsequently, it calls amdgpu_userqueue_ensure_ev_fence(), which is in a waiting for the userq resume work. Meanwhile, the userq suspend worker has initiated the userq resume work(amdgpu_userqueue_resume_worker). This userq resume work attempts to validate the vm->done BO, leading to amdgpu_userqueue_validate_bos also attempting to reservation lock the same write BO that is already locked by amdgpu_userq_signal_ioctl. As a result, the resume work becomes stalled, causing amdgpu_userqueue_ensure_ev_fence to remain in a waiting state. Call Trace: [ 242.836469] INFO: task gnome-shel:cs0:1288 blocked for more than 120 seconds. [ 242.836486] Tainted: G OE 6.12.0-rc2rebased-oct-24+ #4 [ 242.836491] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.836494] task:gnome-shel:cs0 state:D stack:0 pid:1288 tgid:1282 ppid:1180 flags:0x00000002 [ 242.836503] Call Trace: [ 242.836508] [ 242.836517] __schedule+0x3e0/0xb10 [ 242.836530] ? srso_return_thunk+0x5/0x5f [ 242.836541] schedule+0x31/0x120 [ 242.836546] schedule_timeout+0x150/0x160 [ 242.836551] ? srso_return_thunk+0x5/0x5f [ 242.836555] ? sysvec_call_function+0x69/0xd0 [ 242.836562] ? srso_return_thunk+0x5/0x5f [ 242.836567] ? preempt_count_add+0x7f/0xd0 [ 242.836577] __wait_for_common+0x91/0x180 [ 242.836582] ? __pfx_schedule_timeout+0x10/0x10 [ 242.836590] wait_for_completion+0x28/0x30 [ 242.836595] __flush_work+0x16c/0x290 [ 242.836602] ? __pfx_wq_barrier_func+0x10/0x10 [ 242.836611] flush_delayed_work+0x3a/0x60 [ 242.836621] amdgpu_userqueue_ensure_ev_fence+0x2d/0xb0 [amdgpu] [ 242.836966] amdgpu_userq_signal_ioctl+0x959/0xec0 [amdgpu] [ 242.837171] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.837365] drm_ioctl_kernel+0xae/0x100 [drm] [ 242.837398] drm_ioctl+0x2a1/0x500 [drm] [ 242.837420] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.837622] ? srso_return_thunk+0x5/0x5f [ 242.837627] ? srso_return_thunk+0x5/0x5f [ 242.837630] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 242.837635] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 242.837811] __x64_sys_ioctl+0x99/0xd0 [ 242.837820] x64_sys_call+0x1209/0x20d0 [ 242.837825] do_syscall_64+0x51/0x120 [ 242.837830] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 242.837835] RIP: 0033:0x7f2f33f1a94f [ 242.837838] RSP: 002b:00007f2f24ffea30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 242.837842] RAX: ffffffffffffffda RBX: 00007f2f24ffebd0 RCX: 00007f2f33f1a94f [ 242.837845] RDX: 00007f2f24ffebd0 RSI: 00000000c0306457 RDI: 000000000000000d [ 242.837847] RBP: 00007f2f24ffeab0 R08: 0000000000000000 R09: 0000000000000000 [ 242.837849] R10: 00007f2f24ffecd0 R11: 0000000000000246 R12: 00007f2f25000640 [ 242.837851] R13: 00000000c0306457 R14: 000000000000000d R15: 00007fff3b39c1e0 [ 242.837858] [ 242.837865] INFO: task Xwayland:cs0:1517 blocked for more than 120 seconds. [ 242.837869] Tainted: G OE 6.12.0-rc2rebased-oct-24+ #4 [ 242.837872] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.837874] task:Xwayland:cs0 state:D stack:0 pid:1517 tgid:1338 ppid:1282 flags:0x00004002 [ 242.837878] Call Trace: [ 242.837880] [ 242.837883] __schedule+0x3e0/0xb10 [ 242.837890] schedule+0x31/0x120 [ 242.837894] schedule_preempt_disabled+0x1c/0x30 [ 242.837897] __mutex_lock.constprop.0+0x386/0x6e0 [ 242.837902] ? srso_return_thunk+0x5/0x5f [ 242.837905] ? __timer_delete_sync+0x81/0xe0 [ 242.837911] __mutex_lock_slowpath+0x13/0x20 [ 242.837915] mutex_lock+0x3b/0x50 [ 242.837919] amdgpu_userqueue_ensure_ev_fence+0x35/0xb0 [amdgpu] [ 242.838138] amdgpu_userq_signal_ioctl+0x959/0xec0 [amdgpu] [ 242.838340] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.838531] drm_ioctl_kernel+0xae/0x100 [drm] [ 242.838559] drm_ioctl+0x2a1/0x500 [drm] [ 242.838580] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.838778] ? srso_return_thunk+0x5/0x5f [ 242.838783] ? srso_return_thunk+0x5/0x5f [ 242.838786] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 242.838791] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 242.838967] __x64_sys_ioctl+0x99/0xd0 [ 242.838972] x64_sys_call+0x1209/0x20d0 [ 242.838975] do_syscall_64+0x51/0x120 [ 242.838979] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 242.838982] RIP: 0033:0x7f9118b1a94f [ 242.838985] RSP: 002b:00007f910cdff760 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 242.838989] RAX: ffffffffffffffda RBX: 00007f910cdff910 RCX: 00007f9118b1a94f [ 242.838991] RDX: 00007f910cdff910 RSI: 00000000c0306457 RDI: 000000000000000c [ 242.838993] RBP: 00007f910cdff7e0 R08: 0000000000000000 R09: 0000000000000001 [ 242.838995] R10: 00007f910cdff9d4 R11: 0000000000000246 R12: 00007f910ce00640 [ 242.838997] R13: 00000000c0306457 R14: 000000000000000c R15: 00007fff9dd11d10 [ 242.839004] v2: Addressed review comemnts from Christian. v3/v4: Addressed review comemnts from Christian. - Move drm_exec drm_exec loop after userq fence create. - cleanup the newly created userq fence in case of error. v5 - Addressed review comemnts from Christian. - Create a new amdgpu_userq_fence_alloc() function for allocation. - Calling dma_fence_put for cleanup procedure. - make amdgpu_userq_fence_create() function static. - drm_exec_init is called after mutex_unlock. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 71 ++++++++++++------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 567a5ffa77651..a4953d668972a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -197,11 +197,18 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - u64 seq, struct dma_fence **f) +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ +static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +{ + *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + return *userq_fence ? 0 : -ENOMEM; +} + +static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *userq_fence, + u64 seq, struct dma_fence **f) { struct amdgpu_userq_fence_driver *fence_drv; - struct amdgpu_userq_fence *userq_fence; struct dma_fence *fence; unsigned long flags; @@ -209,10 +216,6 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, if (!fence_drv) return -EINVAL; - userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); - if (!userq_fence) - return -ENOMEM; - spin_lock_init(&userq_fence->lock); INIT_LIST_HEAD(&userq_fence->link); fence = &userq_fence->base; @@ -266,6 +269,7 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, return 0; } +#endif static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) { @@ -383,6 +387,11 @@ map_error: return r; } +static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) +{ + dma_fence_put(fence); +} + int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -392,6 +401,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_gem_object **gobj_write = NULL; struct drm_gem_object **gobj_read = NULL; struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *userq_fence; struct drm_syncobj **syncobj = NULL; u32 *bo_handles_write, num_write_bo_handles; u32 *syncobj_handles, num_syncobj_handles; @@ -475,6 +485,29 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, goto put_gobj_write; } + r = amdgpu_userq_fence_read_wptr(queue, &wptr); + if (r) + goto put_gobj_write; + + r = amdgpu_userq_fence_alloc(&userq_fence); + if (r) + goto put_gobj_write; + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + /* Create a new fence */ + r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + goto put_gobj_write; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); @@ -482,31 +515,19 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + amdgpu_userq_fence_cleanup(fence); goto exec_fini; + } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + amdgpu_userq_fence_cleanup(fence); goto exec_fini; + } } - r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) - goto exec_fini; - - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) - goto exec_fini; - - /* We are here means UQ is active, make sure the eviction fence is valid */ - amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - mutex_unlock(&userq_mgr->userq_mutex); - for (i = 0; i < num_read_bo_handles; i++) { if (!gobj_read || !gobj_read[i]->resv) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index f1a90840ac1fd..f0a91cc028808 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -61,8 +61,7 @@ struct amdgpu_userq_fence_driver { int amdgpu_userq_fence_slab_init(void); void amdgpu_userq_fence_slab_fini(void); -int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - u64 seq, struct dma_fence **f); + void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 2eac83ba8bdf7..f1d4e29772a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -455,7 +455,7 @@ amdgpu_userqueue_validate_bos(struct amdgpu_userq_mgr *uq_mgr) bool clear, unlock; int ret = 0; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = amdgpu_vm_lock_pd(vm, &exec, 2); drm_exec_retry_on_contention(&exec); -- GitLab From 29adc5c2dd7a0e8dba9aac38a3116df38220dc4b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 16:08:02 -0500 Subject: [PATCH 211/899] drm/amdgpu/userq: fix hardcoded uq functions Use the IP type to look up the userq functions rather than hardcoding it. Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index f1d4e29772a53..0664e04828c07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -415,11 +415,11 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; - /* Resume all the queues for this process */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; ret = userq_funcs->resume(uq_mgr, queue); + } if (ret) DRM_ERROR("Failed to resume all the queue\n"); @@ -570,11 +570,11 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; - /* Try to suspend all the queues in this process ctx */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; ret += userq_funcs->suspend(uq_mgr, queue); + } if (ret) DRM_ERROR("Couldn't suspend all the queues\n"); -- GitLab From 2a060b3ae92ec1951a8a94bf64580ccb4fedf38c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 09:44:39 -0500 Subject: [PATCH 212/899] drm/amdgpu/userq: handle runtime pm Take a reference when we create a queue and drop it when we destroy the queue. We need to keep the device active while user queues are active. v2: squash in fix from Sunil v3: squash in fix from Prike Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9a6a5553bbc3f..b469b800119ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -26,6 +26,7 @@ #include "mes_userqueue.h" #include "amdgpu_userq_fence.h" #include "v11_structs.h" +#include #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE @@ -287,6 +288,12 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, queue->userq_prop = userq_props; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue mqd create\n"); + goto deference_pm; + } + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); if (r) { DRM_ERROR("Failed to initialize MQD for userqueue\n"); @@ -321,6 +328,9 @@ free_ctx: free_mqd: amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); +deference_pm: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); free_props: kfree(userq_props); @@ -330,14 +340,19 @@ free_props: static void mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) + struct amdgpu_usermode_queue *queue) { + struct amdgpu_device *adev = uq_mgr->adev; + if (queue->queue_active) mes_userq_unmap(uq_mgr, queue); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, -- GitLab From df85baa767ca39c0a28d56a5a02251844c122a09 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:14:35 -0500 Subject: [PATCH 213/899] drm/amdgpu: return an error in the userq IOCTL when DRM_AMDGPU_NAVI3X_USERQ=n I'd swear this was already fixed, but I guess the patch never landed. Add it now. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 0664e04828c07..a78a3728f6b80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -402,7 +402,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - return 0; + return -ENOTSUPP; } #endif -- GitLab From c4f42c8d0b97c2f16a4fc90f77eb3c0315c4cf6b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:37:31 -0500 Subject: [PATCH 214/899] drm/amdgpu/Kconfig: fix wording of DRM_AMDGPU_NAVI3X_USERQ The feature is not navi3x specific at this point. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 5bb44ea80164a..b8f39377b24a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -97,13 +97,13 @@ config DRM_AMDGPU_WERROR Only enable this if you are warning code for amdgpu.ko. config DRM_AMDGPU_NAVI3X_USERQ - bool "Enable Navi 3x gfx usermode queues" + bool "Enable amdgpu usermode queues" depends on DRM_AMDGPU depends on BROKEN default n help Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute - workload submission. This feature is experimental and supported on Navi 3X only. + workload submission. This feature is experimental and supported on GFX11+. source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" -- GitLab From f36e4876c8e1ea61c48f6048bfcfd540c7abef2f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:45:37 -0500 Subject: [PATCH 215/899] drm/amdgpu/gfx11: fix config guard s/CONFIG_DRM_AMD_USERQ_GFX/CONFIG_DRM_AMDGPU_NAVI3X_USERQ/ Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d3b4018580c68..80af0b92d6b11 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1639,7 +1639,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; -#ifdef CONFIG_DRM_AMD_USERQ_GFX +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif -- GitLab From 5ca4095960a8a8b7f24f02af6e5ce7b284e04559 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:50:11 -0500 Subject: [PATCH 216/899] drm/amdgpu: add userq firmware version checks Currently disabled until the firmwares are officially released. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 14 ++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 4 +++- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 80af0b92d6b11..f98c9c8253f87 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1623,8 +1623,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; case IP_VERSION(11, 0, 1): @@ -1640,8 +1643,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0e95c1cfeca63..58f17d95b2f5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1420,8 +1420,11 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 9bc3c7a35d185..3aa4fec4d9e4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1381,7 +1381,9 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 3514089acd942..8e3aa4536e5a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1383,7 +1383,9 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif -- GitLab From 665de8c94792d095c9dbd0ef16b0532f546aa8f9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Feb 2025 16:46:52 -0500 Subject: [PATCH 217/899] drm/amdgpu/userq: remove BROKEN from config This can be enabled now. We have the firmware checks in place. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index b8f39377b24a2..7b95221d2f3d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -99,7 +99,6 @@ config DRM_AMDGPU_WERROR config DRM_AMDGPU_NAVI3X_USERQ bool "Enable amdgpu usermode queues" depends on DRM_AMDGPU - depends on BROKEN default n help Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute -- GitLab From ecdb0b32e518a69a724d3cb7a6e5a4d2be2db8a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:55:57 -0500 Subject: [PATCH 218/899] drm/amdgpu/userq: move the header to amdgpu directory To align with other headers. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/{include => amdgpu}/amdgpu_userqueue.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename drivers/gpu/drm/amd/{include => amdgpu}/amdgpu_userqueue.h (100%) diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h similarity index 100% rename from drivers/gpu/drm/amd/include/amdgpu_userqueue.h rename to drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h -- GitLab From ad6c120f6888033b1eb198a7a15ed4c6355edf6d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 18 Feb 2025 18:56:25 +0530 Subject: [PATCH 219/899] drm/amdgpu: fix the memleak caused by fence not released MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encountering a taint issue during the unloading of gpu_sched due to the fence not being released/put. In this context, amdgpu_vm_clear_freed is responsible for creating a job to update the page table (PT). It allocates kmem_cache for drm_sched_fence and returns the finished fence associated with job->base.s_fence. In case of Usermode queue this finished fence is added to the timeline sync object through amdgpu_gem_update_bo_mapping, which is utilized by user space to ensure the completion of the PT update. [ 508.900587] ============================================================================= [ 508.900605] BUG drm_sched_fence (Tainted: G N): Objects remaining in drm_sched_fence on __kmem_cache_shutdown() [ 508.900617] ----------------------------------------------------------------------------- [ 508.900627] Slab 0xffffe0cc04548780 objects=32 used=2 fp=0xffff8ea81521f000 flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff) [ 508.900645] CPU: 3 UID: 0 PID: 2337 Comm: rmmod Tainted: G N 6.12.0+ #1 [ 508.900651] Tainted: [N]=TEST [ 508.900653] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS ELITE/X570 AORUS ELITE, BIOS F34 06/10/2021 [ 508.900656] Call Trace: [ 508.900659] [ 508.900665] dump_stack_lvl+0x70/0x90 [ 508.900674] dump_stack+0x14/0x20 [ 508.900678] slab_err+0xcb/0x110 [ 508.900687] ? srso_return_thunk+0x5/0x5f [ 508.900692] ? try_to_grab_pending+0xd3/0x1d0 [ 508.900697] ? srso_return_thunk+0x5/0x5f [ 508.900701] ? mutex_lock+0x17/0x50 [ 508.900708] __kmem_cache_shutdown+0x144/0x2d0 [ 508.900713] ? flush_rcu_work+0x50/0x60 [ 508.900719] kmem_cache_destroy+0x46/0x1f0 [ 508.900728] drm_sched_fence_slab_fini+0x19/0x970 [gpu_sched] [ 508.900736] __do_sys_delete_module.constprop.0+0x184/0x320 [ 508.900744] ? srso_return_thunk+0x5/0x5f [ 508.900747] ? debug_smp_processor_id+0x1b/0x30 [ 508.900754] __x64_sys_delete_module+0x16/0x20 [ 508.900758] x64_sys_call+0xdf/0x20d0 [ 508.900763] do_syscall_64+0x51/0x120 [ 508.900769] entry_SYSCALL_64_after_hwframe+0x76/0x7e v2: call dma_fence_put in amdgpu_gem_va_update_vm v3: Addressed review comments from Christian. - calling amdgpu_gem_update_timeline_node before switch. - puting a dma_fence in case of error or !timeline_syncobj. v4: Addressed review comments from Christian. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Cc: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Le Ma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b9b80b0b60cad..f03fc3cf4d50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -934,6 +934,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + if (r) + goto error; + switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -960,22 +968,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { - - r = amdgpu_gem_update_timeline_node(filp, - args->vm_timeline_syncobj_out, - args->vm_timeline_point, - &timeline_syncobj, - &timeline_chain); - fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); - if (!r) + if (timeline_syncobj) amdgpu_gem_update_bo_mapping(filp, bo_va, - args->operation, - args->vm_timeline_point, - fence, timeline_syncobj, - timeline_chain); + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + else + dma_fence_put(fence); + } error: -- GitLab From 158bfbc72c5d7675039df540b120ccdd37bca5f0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:31:57 -0500 Subject: [PATCH 220/899] drm/amdgpu: validate user queue parameters Make sure these are set properly to ensure compatibility if we ever update the IOCTL interface. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index a78a3728f6b80..a02614cbda36e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -380,12 +380,26 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_USERQ_OP_CREATE: + if (args->in._pad) + return -EINVAL; r = amdgpu_userqueue_create(filp, args); if (r) DRM_ERROR("Failed to create usermode queue\n"); break; case AMDGPU_USERQ_OP_FREE: + if (args->in.ip_type || + args->in.doorbell_handle || + args->in.doorbell_offset || + args->in._pad || + args->in.queue_va || + args->in.queue_size || + args->in.rptr_va || + args->in.wptr_va || + args->in.wptr_va || + args->in.mqd || + args->in.mqd_size) + return -EINVAL; r = amdgpu_userqueue_destroy(filp, args->in.queue_id); if (r) DRM_ERROR("Failed to destroy usermode queue\n"); -- GitLab From cb17fff3a254c3beb02101c68b64066797ec9028 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 15:39:02 -0500 Subject: [PATCH 221/899] drm/amdgpu/mes: remove unused functions Leftover from the MES self tests that were removed previously. Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 800 ------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 41 -- 2 files changed, 841 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 503add9386340..1d355b0fd4b6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -39,42 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) PAGE_SIZE); } -static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - int ip_type, uint64_t *doorbell_index) -{ - unsigned int offset, found; - struct amdgpu_mes *mes = &adev->mes; - - if (ip_type == AMDGPU_RING_TYPE_SDMA) - offset = adev->doorbell_index.sdma_engine[0]; - else - offset = 0; - - found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); - if (found >= mes->num_mes_dbs) { - DRM_WARN("No doorbell available\n"); - return -ENOSPC; - } - - set_bit(found, mes->doorbell_bitmap); - - /* Get the absolute doorbell index on BAR */ - *doorbell_index = mes->db_start_dw_offset + found * 2; - return 0; -} - -static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - uint32_t doorbell_index) -{ - unsigned int old, rel_index; - struct amdgpu_mes *mes = &adev->mes; - - /* Find the relative index of the doorbell in this object */ - rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; - old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); - WARN_ON(!old); -} - static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) { int i; @@ -237,244 +201,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) mutex_destroy(&adev->mes.mutex_hidden); } -static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) -{ - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); -} - -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm) -{ - struct amdgpu_mes_process *process; - int r; - - /* allocate the mes process buffer */ - process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); - if (!process) { - DRM_ERROR("no more memory to create mes process\n"); - return -ENOMEM; - } - - /* allocate the process context bo and map it */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_memory; - } - memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* add the mes process to idr list */ - r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to lock pasid=%d\n", pasid); - goto clean_up_ctx; - } - - INIT_LIST_HEAD(&process->gang_list); - process->vm = vm; - process->pasid = pasid; - process->process_quantum = adev->mes.default_process_quantum; - process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); -clean_up_memory: - kfree(process); - return r; -} - -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang, *tmp1; - struct amdgpu_mes_queue *queue, *tmp2; - struct mes_remove_queue_input queue_input; - unsigned long flags; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_WARN("pasid %d doesn't exist\n", pasid); - amdgpu_mes_unlock(&adev->mes); - return; - } - - /* Remove all queues from hardware */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, - &queue_input); - if (r) - DRM_WARN("failed to remove hardware queue\n"); - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - } - - idr_remove(&adev->mes.pasid_idr, pasid); - amdgpu_mes_unlock(&adev->mes); - - /* free all memory allocated by the process */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - /* free all queues in the gang */ - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - amdgpu_mes_queue_free_mqd(queue); - list_del(&queue->list); - kfree(queue); - } - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - list_del(&gang->list); - kfree(gang); - - } - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - kfree(process); -} - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; - int r; - - /* allocate the mes gang buffer */ - gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); - if (!gang) { - return -ENOMEM; - } - - /* allocate the gang context bo and map it to cpu space */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_mem; - } - memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_ERROR("pasid %d doesn't exist\n", pasid); - r = -EINVAL; - goto clean_up_ctx; - } - - /* add the mes gang to idr list */ - r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to allocate idr for gang\n"); - goto clean_up_ctx; - } - - gang->gang_id = r; - *gang_id = r; - - INIT_LIST_HEAD(&gang->queue_list); - gang->process = process; - gang->priority = gprops->priority; - gang->gang_quantum = gprops->gang_quantum ? - gprops->gang_quantum : adev->mes.default_gang_quantum; - gang->global_priority_level = gprops->global_priority_level; - gang->inprocess_gang_priority = gprops->inprocess_gang_priority; - list_add_tail(&gang->list, &process->gang_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); -clean_up_mem: - kfree(gang); - return r; -} - -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) -{ - struct amdgpu_mes_gang *gang; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - - if (!list_empty(&gang->queue_list)) { - DRM_ERROR("queue list is not empty\n"); - amdgpu_mes_unlock(&adev->mes); - return -EBUSY; - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - list_del(&gang->list); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - - kfree(gang); - - return 0; -} - int amdgpu_mes_suspend(struct amdgpu_device *adev) { struct mes_suspend_gang_input input; @@ -523,241 +249,6 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) return r; } -static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - u32 mqd_size = mqd_mgr->mqd_size; - int r; - - r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &q->mqd_obj, - &q->mqd_gpu_addr, &q->mqd_cpu_ptr); - if (r) { - dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r); - return r; - } - memset(q->mqd_cpu_ptr, 0, mqd_size); - - r = amdgpu_bo_reserve(q->mqd_obj, false); - if (unlikely(r != 0)) - goto clean_up; - - return 0; - -clean_up: - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); - return r; -} - -static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - struct amdgpu_mqd_prop mqd_prop = {0}; - - mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; - mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; - mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; - mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr; - mqd_prop.queue_size = p->queue_size; - mqd_prop.use_doorbell = true; - mqd_prop.doorbell_index = p->doorbell_off; - mqd_prop.eop_gpu_addr = p->eop_gpu_addr; - mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority; - mqd_prop.hqd_queue_priority = p->hqd_queue_priority; - mqd_prop.hqd_active = false; - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); - } - - mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } - - amdgpu_bo_unreserve(q->mqd_obj); -} - -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id) -{ - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_add_queue_input queue_input; - unsigned long flags; - int r; - - memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); - - /* allocate the mes queue buffer */ - queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); - if (!queue) { - DRM_ERROR("Failed to allocate memory for queue\n"); - return -ENOMEM; - } - - /* Allocate the queue mqd */ - r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); - if (r) - goto clean_up_memory; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - r = -EINVAL; - goto clean_up_mqd; - } - - /* add the mes gang to idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, - GFP_ATOMIC); - if (r < 0) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - goto clean_up_mqd; - } - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - *queue_id = queue->queue_id = r; - - /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, - qprops->queue_type, - &qprops->doorbell_off); - if (r) - goto clean_up_queue_id; - - /* initialize the queue mqd */ - amdgpu_mes_queue_init_mqd(adev, queue, qprops); - - /* add hw queue to mes */ - queue_input.process_id = gang->process->pasid; - - queue_input.page_table_base_addr = - adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - - adev->gmc.vram_start; - - queue_input.process_va_start = 0; - queue_input.process_va_end = adev->vm_manager.max_pfn - 1; - queue_input.process_quantum = gang->process->process_quantum; - queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; - queue_input.gang_quantum = gang->gang_quantum; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - queue_input.inprocess_gang_priority = gang->inprocess_gang_priority; - queue_input.gang_global_priority_level = gang->global_priority_level; - queue_input.doorbell_offset = qprops->doorbell_off; - queue_input.mqd_addr = queue->mqd_gpu_addr; - queue_input.wptr_addr = qprops->wptr_gpu_addr; - queue_input.wptr_mc_addr = qprops->wptr_mc_addr; - queue_input.queue_type = qprops->queue_type; - queue_input.paging = qprops->paging; - queue_input.is_kfd_process = 0; - - r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); - if (r) { - DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", - qprops->doorbell_off); - goto clean_up_doorbell; - } - - DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " - "queue type=%d, doorbell=0x%llx\n", - gang->process->pasid, gang_id, qprops->queue_type, - qprops->doorbell_off); - - queue->ring = qprops->ring; - queue->doorbell_off = qprops->doorbell_off; - queue->wptr_gpu_addr = qprops->wptr_gpu_addr; - queue->queue_type = qprops->queue_type; - queue->paging = qprops->paging; - queue->gang = gang; - queue->ring->mqd_ptr = queue->mqd_cpu_ptr; - list_add_tail(&queue->list, &gang->queue_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); -clean_up_queue_id: - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); -clean_up_mqd: - amdgpu_mes_unlock(&adev->mes); - amdgpu_mes_queue_free_mqd(queue); -clean_up_memory: - kfree(queue); - return r; -} - -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) -{ - unsigned long flags; - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_remove_queue_input queue_input; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* remove the mes gang from idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - - queue = idr_find(&adev->mes.queue_id_idr, queue_id); - if (!queue) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - amdgpu_mes_unlock(&adev->mes); - DRM_ERROR("queue id %d doesn't exist\n", queue_id); - return -EINVAL; - } - - idr_remove(&adev->mes.queue_id_idr, queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n", - queue->doorbell_off); - - gang = queue->gang; - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to remove hardware queue, queue id = %d\n", - queue_id); - - list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_mes_queue_free_mqd(queue); - kfree(queue); - return 0; -} - int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) { unsigned long flags; @@ -1071,25 +562,6 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, return r; } -static void -amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_mes_queue_properties *props) -{ - props->queue_type = ring->funcs->type; - props->hqd_base_gpu_addr = ring->gpu_addr; - props->rptr_gpu_addr = ring->rptr_gpu_addr; - props->wptr_gpu_addr = ring->wptr_gpu_addr; - props->wptr_mc_addr = - ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; - props->queue_size = ring->ring_size; - props->eop_gpu_addr = ring->eop_gpu_addr; - props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; - props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; - props->paging = false; - props->ring = ring; -} - #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \ do { \ if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \ @@ -1126,284 +598,12 @@ int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs) return -EINVAL; } -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang *gang; - struct amdgpu_mes_queue_properties qprops = {0}; - int r, queue_id, pasid; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - pasid = gang->process->pasid; - - ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); - if (!ring) { - amdgpu_mes_unlock(&adev->mes); - return -ENOMEM; - } - - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->is_mes_queue = true; - ring->mes_ctx = ctx_data; - ring->idx = idx; - ring->no_scheduler = true; - - if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { - int offset = offsetof(struct amdgpu_mes_ctx_meta_data, - compute[ring->idx].mec_hpd); - ring->eop_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } - - switch (queue_type) { - case AMDGPU_RING_TYPE_GFX: - ring->funcs = adev->gfx.gfx_ring[0].funcs; - ring->me = adev->gfx.gfx_ring[0].me; - ring->pipe = adev->gfx.gfx_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_COMPUTE: - ring->funcs = adev->gfx.compute_ring[0].funcs; - ring->me = adev->gfx.compute_ring[0].me; - ring->pipe = adev->gfx.compute_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_SDMA: - ring->funcs = adev->sdma.instance[0].ring.funcs; - break; - default: - BUG(); - } - - r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) { - amdgpu_mes_unlock(&adev->mes); - goto clean_up_memory; - } - - amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); - - dma_fence_wait(gang->process->vm->last_update, false); - dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); - amdgpu_mes_unlock(&adev->mes); - - r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); - if (r) - goto clean_up_ring; - - ring->hw_queue_id = queue_id; - ring->doorbell_index = qprops.doorbell_off; - - if (queue_type == AMDGPU_RING_TYPE_GFX) - sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); - else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) - sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, - queue_id); - else if (queue_type == AMDGPU_RING_TYPE_SDMA) - sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, - queue_id); - else - BUG(); - - *out = ring; - return 0; - -clean_up_ring: - amdgpu_ring_fini(ring); -clean_up_memory: - kfree(ring); - return r; -} - -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring) - return; - - amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); - del_timer_sync(&ring->fence_drv.fallback_timer); - amdgpu_ring_fini(ring); - kfree(ring); -} - uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio) { return adev->mes.aggregated_doorbells[prio]; } -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - int r; - - r = amdgpu_bo_create_kernel(adev, - sizeof(struct amdgpu_mes_ctx_meta_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create CTX bo failed\n", r); - return r; - } - - if (!ctx_data->meta_data_obj) - return -ENOMEM; - - memset(ctx_data->meta_data_ptr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data)); - - return 0; -} - -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) -{ - if (ctx_data->meta_data_obj) - amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); -} - -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va; - struct amdgpu_sync sync; - struct drm_exec exec; - int r; - - amdgpu_sync_create(&sync); - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - } - - bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); - if (!bo_va) { - DRM_ERROR("failed to create bo_va for meta data BO\n"); - r = -ENOMEM; - goto error_fini_exec; - } - - r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data), - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) { - DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); - goto error_del_bo_va; - } - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - DRM_ERROR("failed to do vm_bo_update on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); - - r = amdgpu_vm_update_pdes(adev, vm, false); - if (r) { - DRM_ERROR("failed to update pdes on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); - - amdgpu_sync_wait(&sync, false); - drm_exec_fini(&exec); - - amdgpu_sync_free(&sync); - ctx_data->meta_data_va = bo_va; - return 0; - -error_del_bo_va: - amdgpu_vm_bo_del(adev, bo_va); - -error_fini_exec: - drm_exec_fini(&exec); - amdgpu_sync_free(&sync); - return r; -} - -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; - struct amdgpu_bo *bo = ctx_data->meta_data_obj; - struct amdgpu_vm *vm = bo_va->base.vm; - struct dma_fence *fence; - struct drm_exec exec; - long r; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - } - - amdgpu_vm_bo_del(adev, bo_va); - if (!amdgpu_vm_ready(vm)) - goto out_unlock; - - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, - &fence); - if (r) - goto out_unlock; - if (fence) { - amdgpu_bo_fence(bo, fence, true); - fence = NULL; - } - - r = amdgpu_vm_clear_freed(adev, vm, &fence); - if (r || !fence) - goto out_unlock; - - dma_fence_wait(fence, false); - amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); - -out_unlock: - if (unlikely(r < 0)) - dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); - drm_exec_fini(&exec); - - return r; -} - int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) { const struct mes_firmware_header_v1_0 *mes_hdr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 78362a8382122..be3390d263012 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -149,19 +149,6 @@ struct amdgpu_mes { }; -struct amdgpu_mes_process { - int pasid; - struct amdgpu_vm *vm; - uint64_t pd_gpu_addr; - struct amdgpu_bo *proc_ctx_bo; - uint64_t proc_ctx_gpu_addr; - void *proc_ctx_cpu_ptr; - uint64_t process_quantum; - struct list_head gang_list; - uint32_t doorbell_index; - struct mutex doorbell_lock; -}; - struct amdgpu_mes_gang { int gang_id; int priority; @@ -404,22 +391,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); int amdgpu_mes_init(struct amdgpu_device *adev); void amdgpu_mes_fini(struct amdgpu_device *adev); -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm); -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid); - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id); -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id); - int amdgpu_mes_suspend(struct amdgpu_device *adev); int amdgpu_mes_resume(struct amdgpu_device *adev); -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id); -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, int me_id, int pipe_id, int queue_id, int vmid); @@ -451,25 +425,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, bool trap_en); int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr); -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out); -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring); uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio); -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); - int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); /* -- GitLab From 4220d2c7c41b8ea3fd154dc5678b05575653cba0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2025 13:47:33 -0400 Subject: [PATCH 222/899] drm/amdgpu: remove is_mes_queue flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was leftover from MES bring up when we had MES user queues in the kernel. It's no longer used so remove it. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 112 ++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 14 -- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 84 +++--------- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 143 ++++--------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 67 ++------- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 - drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 - drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 4 - drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 166 +++++++---------------- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 83 ++++-------- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 83 ++++-------- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 164 +++++++--------------- 15 files changed, 259 insertions(+), 697 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2ea98ec602209..802743efa3b39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -163,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow = false; } - if (!ring->sched.ready && !ring->is_mes_queue) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } - if (vm && !job->vmid && !ring->is_mes_queue) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 59acdbfe28d87..426834806fbf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -187,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } #define amdgpu_ring_get_gpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : \ - (ring->adev->wb.gpu_addr + offset * 4)) + (ring->adev->wb.gpu_addr + offset * 4) #define amdgpu_ring_get_cpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - (&ring->adev->wb.wb[offset])) + (&ring->adev->wb.wb[offset]) /** * amdgpu_ring_init - init driver ring struct. @@ -243,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->sched_score = sched_score; ring->vmid_wait = dma_fence_get_stub(); - if (!ring->is_mes_queue) { - ring->idx = adev->num_rings++; - adev->rings[ring->idx] = ring; - } + ring->idx = adev->num_rings++; + adev->rings[ring->idx] = ring; r = amdgpu_fence_driver_init_ring(ring); if (r) return r; } - if (ring->is_mes_queue) { - ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RPTR_OFFS); - ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_WPTR_OFFS); - ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_FENCE_OFFS); - ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); - ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_COND_EXE_OFFS); - } else { - r = amdgpu_device_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); - if (r) { - dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); + if (r) { + dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); + return r; } ring->fence_gpu_addr = @@ -353,18 +334,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->cached_rptr = 0; /* Allocate ring buffer */ - if (ring->is_mes_queue) { - int offset = 0; - - BUG_ON(ring->ring_size > PAGE_SIZE*4); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RING_OFFS); - ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - amdgpu_ring_clear_ring(ring); - - } else if (ring->ring_obj == NULL) { + if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, @@ -401,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { /* Not to finish a ring which is not initialized */ - if (!(ring->adev) || - (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; ring->sched.ready = false; - if (!ring->is_mes_queue) { - amdgpu_device_wb_free(ring->adev, ring->rptr_offs); - amdgpu_device_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_device_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); - amdgpu_bo_free_kernel(&ring->ring_obj, - &ring->gpu_addr, - (void **)&ring->ring); - } else { - kfree(ring->fence_drv.fences); - } + amdgpu_bo_free_kernel(&ring->ring_obj, + &ring->gpu_addr, + (void **)&ring->ring); dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - if (!ring->is_mes_queue) - ring->adev->rings[ring->idx] = NULL; + ring->adev->rings[ring->idx] = NULL; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bb2b663852237..615c3d5c5a8d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -301,11 +301,6 @@ struct amdgpu_ring { unsigned num_hw_submission; atomic_t *sched_score; - /* used for mes */ - bool is_mes_queue; - uint32_t hw_queue_id; - struct amdgpu_mes_ctx_data *mes_ctx; - bool is_sw_ring; unsigned int entry_index; /* store the cached rptr to restore after reset */ @@ -435,15 +430,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, ring->ring[offset] = cur - offset; } -#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) - -#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - NULL) - int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 807da45958934..c3c6f03190c89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -76,22 +76,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; - if (ring->is_mes_queue) { - uint32_t offset = 0; + r = amdgpu_sdma_get_index_from_ring(ring, &index); - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - sdma[ring->idx].sdma_meta_data); - csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - r = amdgpu_sdma_get_index_from_ring(ring, &index); - - if (r || index > 31) - csa_mc_addr = 0; - else - csa_mc_addr = amdgpu_csa_vaddr(adev) + - AMDGPU_CSA_SDMA_OFFSET + - index * AMDGPU_CSA_SDMA_SIZE; - } + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; return csa_mc_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ce52b4d75e943..aa65d64fb15c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -817,7 +817,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + if (ring->funcs->emit_gds_switch && gds_switch_needed) { amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f98c9c8253f87..23f4d097fa8cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -612,33 +612,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -665,12 +650,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -5757,10 +5740,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -5780,10 +5759,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -5841,8 +5816,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -5870,10 +5844,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -6102,28 +6073,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 58f17d95b2f5b..1c7022103c638 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -506,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -559,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -4252,45 +4235,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -4315,42 +4270,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx12 now */ - } + BUG(); /* only DOORBELL method supported on gfx12 now */ } } @@ -4397,10 +4324,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4420,10 +4343,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4463,8 +4382,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -4492,10 +4410,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c6125abddbb9d..e62c77b3934a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5466,16 +5466,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_ce_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); @@ -5498,16 +5490,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_de_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = IB_COMPLETION_STATUS_PREEMPTED; @@ -5697,19 +5681,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -5795,28 +5769,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v9_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); if (usegds) { de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 95d894a231fcf..8ae4c031162bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -428,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ad099f136f84e..5c91d4445418c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -393,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 05c026d0b0d96..6e15cff6d5480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -413,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 0dce59f4f6e23..e1348b6d9c6a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -369,67 +369,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -575,11 +544,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -1046,33 +1013,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1085,10 +1041,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1100,8 +1053,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1124,38 +1076,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, - AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1183,10 +1121,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1197,8 +1132,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b39a03ff0c13..964f12afac9e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -394,11 +394,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if ((flags & AMDGPU_FENCE_FLAG_INT)) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -903,33 +901,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -942,10 +929,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -957,8 +941,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -981,37 +964,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1039,10 +1008,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1053,8 +1019,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3aa4fec4d9e4a..00dd7bfff01a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -377,11 +377,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -921,33 +919,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -960,10 +947,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -975,8 +959,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -999,37 +982,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1057,10 +1026,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1071,8 +1037,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 8e3aa4536e5a6..99744728f5cb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -205,66 +205,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -408,11 +381,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -965,33 +936,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1004,10 +964,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1019,8 +976,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1043,37 +999,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1101,10 +1043,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1115,8 +1054,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } -- GitLab From 9e2bbba1d516b52c2ebc9875144f544025f9d5ef Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 12:31:46 -0500 Subject: [PATCH 223/899] drm/amdgpu/mes: centralize gfx_hqd mask management Move it to amdgpu_mes to align with the compute and sdma hqd masks. No functional change. v2: rebase on new changes v3: misc optimizations Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 16 +++------------- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 15 +++------------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 1d355b0fd4b6b..016af4e9c35fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -108,6 +108,27 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { + if (i >= adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me) + break; + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(12, 0, 0)) + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + adev->mes.gfx_hqd_mask[i] = 0xFE; + else + /* + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1 for MES scheduling + * mask = 10b + */ + adev->mes.gfx_hqd_mask[i] = 0x2; + } + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) break; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ccc19a40f03d4..06b51867c9aac 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -669,18 +669,6 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, offsetof(union MESAPI__MISC, api_status)); } -static void mes_v11_0_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) -{ - /* - * GFX pipe 0 queue 0 is being used by Kernel queue. - * Set GFX pipe 0 queue 1 for MES scheduling - * mask = 10b - * GFX pipe 1 can't be used for MES due to HW limitation. - */ - pkt->gfx_hqd_mask[0] = 0x2; - pkt->gfx_hqd_mask[1] = 0; -} - static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) { int i; @@ -705,7 +693,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; - mes_v11_0_set_gfx_hqd_mask(&mes_set_hw_res_pkt); + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 801928555effb..8892858cfd9ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -694,17 +694,6 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static void mes_v12_0_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) -{ - /* - * GFX V12 has only one GFX pipe, but 8 queues in it. - * GFX pipe 0 queue 0 is being used by Kernel queue. - * Set GFX pipe 0 queue 1-7 for MES scheduling - * mask = 1111 1110b - */ - pkt->gfx_hqd_mask[0] = 0xFE; -} - static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; @@ -727,7 +716,9 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; - mes_v12_0_set_gfx_hqd_mask(&mes_set_hw_res_pkt); + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = -- GitLab From b6f190e6236af268e504d8ba62ab89b8ba5a1674 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 10:18:58 -0400 Subject: [PATCH 224/899] drm/amdgpu/mes: warn on unexpected pipe numbers Warn if the number of pipes exceeds what the MES supports. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 016af4e9c35fa..c52071841226f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -90,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) int amdgpu_mes_init(struct amdgpu_device *adev) { - int i, r; + int i, r, num_pipes; adev->mes.adev = adev; @@ -108,8 +108,13 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; + num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; + if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) + dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_GFX_PIPES); + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { - if (i >= adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me) + if (i >= num_pipes) break; if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) @@ -129,14 +134,24 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = 0x2; } + num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; + if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) + dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + if (i >= num_pipes) break; adev->mes.compute_hqd_mask[i] = 0xc; } + num_pipes = adev->sdma.num_instances; + if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) + dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (i >= adev->sdma.num_instances) + if (i >= num_pipes) break; adev->mes.sdma_hqd_mask[i] = 0xfc; } -- GitLab From 32bd8b3ea7071ca96d4adcd0817046e09c5df415 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 18 Mar 2025 18:45:40 +0530 Subject: [PATCH 225/899] drm/amdgpu: Fix display freezing issue when resizing apps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The display is freezing because the amdgpu_userq_wait_ioctl() is waiting for a non-user queue fence(specifically, the PT update fence). RootCause: The resume_work is initiated by both amdgpu_userq_suspend and amdgpu_userqueue_ensure_ev_fence at same time. The amdgpu_userq_suspend signals a dma-fence and subsequently triggers the resume_work, which is intended to replace the existing fence by creating new dma-fence. However, following this, the amdgpu_userqueue_ensure_ev_fence schedules another resume_work that generates a new dma-fence, thereby replacing the one created by amdgpu_userq_suspend. Consequently, the original fence will never be signaled. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Cc: Sunil Khatri Reviewed-by: Christian König Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 167951aee502d..0075469550b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -52,6 +52,9 @@ amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, unsigned long index; int ret; + if (evf_mgr->ev_fence && + !dma_fence_is_signaled(&evf_mgr->ev_fence->base)) + return 0; /* * Steps to replace eviction fence: * * lock all objects in exec (caller) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index a02614cbda36e..beae931152a3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -555,8 +555,11 @@ unlock_all: static void amdgpu_userqueue_resume_worker(struct work_struct *work) { struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); int ret; + flush_work(&fpriv->evf_mgr.suspend_work.work); + mutex_lock(&uq_mgr->userq_mutex); ret = amdgpu_userqueue_validate_bos(uq_mgr); -- GitLab From 9983ed969365329e22f3ea00838a6ad4afb65539 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 12:09:12 -0400 Subject: [PATCH 226/899] drm/amdgpu/gfx11: clean up and consolidate sw_init With the ME details fixed, we can now consolidate this state. Also split out the userq setup into a separate switch statement so that we can set them per IP version when the firmwares are ready. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 33 ++++++++++++++++---------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 23f4d097fa8cb..29793caa64661 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1597,14 +1597,35 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; + break; + default: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0) { @@ -1619,12 +1640,6 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 2; - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0) { @@ -1634,12 +1649,6 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) #endif break; default: - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 8; break; } -- GitLab From a4a3373da2255c3149fec8fc423653aac2e570a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 12:21:22 -0400 Subject: [PATCH 227/899] drm/amdgpu/gfx12: split userq setup to a separate switch Add a separate switch statement for the userq callback assignment so that we can assign the callbacks for each asic as the firmware becomes available. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 1c7022103c638..bbc6349e5270e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1402,6 +1402,20 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; + break; + default: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ if (0) { @@ -1411,12 +1425,6 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) #endif break; default: - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 8; break; } -- GitLab From 1af688126361bc881b134b9d25738e22dd457f30 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 24 Mar 2025 16:26:00 -0400 Subject: [PATCH 228/899] drm/amdgpu: add UAPI to query if user queues are supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an INFO query to check if user queues are supported. v2: switch to a mask of IPs (Marek) v3: move to drm_amdgpu_info_device (Marek) Cc: marek.olsak@amd.com Cc: prike.liang@amd.com Cc: sunil.khatri@amd.com Cc: yogesh.mohanmarimuthu@amd.com Reviewed-by: Marek Olšák Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 +++++++ include/uapi/drm/amdgpu_drm.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f8370da080388..b64a21773230c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -999,6 +999,13 @@ out: } } + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) + dev_info->userq_ip_mask |= (1 << AMDGPU_HW_IP_GFX); + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) + dev_info->userq_ip_mask |= (1 << AMDGPU_HW_IP_COMPUTE); + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) + dev_info->userq_ip_mask |= (1 << AMDGPU_HW_IP_DMA); + ret = copy_to_user(out, dev_info, min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; kfree(dev_info); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5dbd9037afe75..ef97c0d78b8a0 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1453,6 +1453,9 @@ struct drm_amdgpu_info_device { __u32 csa_size; /* context save area base virtual alignment for gfx11 */ __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; }; struct drm_amdgpu_info_hw_ip { -- GitLab From 100b6010d7540ed0479ccdb85816db42f4111979 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 24 Mar 2025 16:29:03 -0400 Subject: [PATCH 229/899] drm/amdgpu: bump version for user queue IP support query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the user queue IP support query to the drm_amdgpu_info_device query. Cc: marek.olsak@amd.com Cc: prike.liang@amd.com Cc: sunil.khatri@amd.com Cc: yogesh.mohanmarimuthu@amd.com Reviewed-by: Marek Olšák Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5c55f6e01b414..09a121831b2bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -125,9 +125,10 @@ * - 3.61.0 - Contains fix for RV/PCO compute queues * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size + * - 3.64.0 - Userq IP support query */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 63 +#define KMS_DRIVER_MINOR 64 #define KMS_DRIVER_PATCHLEVEL 0 /* -- GitLab From 4ce60dbada9639e385f2f4be2dacf10d8ba22378 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 15:56:24 -0500 Subject: [PATCH 230/899] drm/amdgpu: store userq_managers in a list in adev So we can iterate across them when we need to manage all user queues. v2: add uq_mgr to adev list in amdgpu_userq_mgr_init Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 1 + 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3cdb5f8325aa6..f88a04b1c4cef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1237,6 +1237,9 @@ struct amdgpu_device { * in KFD: VRAM or GTT. */ bool apu_prefer_gtt; + + struct list_head userq_mgr_list; + struct mutex userq_mutex; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4da18ed0beeae..6b38b0511330e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4317,6 +4317,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.kfd_sch_mutex); mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); + mutex_init(&adev->userq_mutex); amdgpu_device_init_apu_flags(adev); @@ -4344,6 +4345,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->pm.od_kobj_list); + INIT_LIST_HEAD(&adev->userq_mgr_list); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index beae931152a3c..ecd49cf15b2a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -658,20 +658,34 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; + mutex_lock(&adev->userq_mutex); + list_add(&userq_mgr->list, &adev->userq_mgr_list); + mutex_unlock(&adev->userq_mutex); + INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker); return 0; } void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { - uint32_t queue_id; + struct amdgpu_device *adev = userq_mgr->adev; struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + uint32_t queue_id; cancel_delayed_work(&userq_mgr->resume_work); mutex_lock(&userq_mgr->userq_mutex); idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id); + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + if (uqm == userq_mgr) { + list_del(&uqm->list); + break; + } + } + mutex_unlock(&adev->userq_mutex); idr_destroy(&userq_mgr->userq_idr); mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h index 0f358f77f2d9b..ec1a4ca6f6321 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h @@ -76,6 +76,7 @@ struct amdgpu_userq_mgr { struct mutex userq_mutex; struct amdgpu_device *adev; struct delayed_work resume_work; + struct list_head list; }; struct amdgpu_db_info { -- GitLab From cf97de5b540425d331394da3deafb2a869b394a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 12:49:30 -0400 Subject: [PATCH 231/899] drm/amdgpu/userq: prevent runtime pm when userqs are active Similar to KFD, prevent runtime pm while user queues are active. Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 09a121831b2bb..c2f6fe305e6f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2710,6 +2710,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev) return 0; } +static int amdgpu_runtime_idle_check_userq(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + ret = -EBUSY; + goto done; + } + } +done: + mutex_unlock(&adev->userq_mutex); + + return ret; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2723,6 +2746,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + return ret; + ret = amdgpu_runtime_idle_check_userq(dev); if (ret) return ret; @@ -2846,7 +2872,11 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) } ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + goto done; + ret = amdgpu_runtime_idle_check_userq(dev); +done: pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); return ret; -- GitLab From a96a787d6de7325f7a961bd74a7cad4d078748b7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 10:33:53 -0500 Subject: [PATCH 232/899] drm/amdgpu: add parameter to disable kernel queues On chips that support user queues, setting this option will disable kernel queues to be used to validate user queues without kernel queues. Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f88a04b1c4cef..804d377037095 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -271,6 +271,7 @@ extern int amdgpu_agp; extern int amdgpu_rebar; extern int amdgpu_wbrf; +extern int amdgpu_disable_kq; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c2f6fe305e6f9..b6184ee242f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -242,6 +242,7 @@ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; int amdgpu_rebar = -1; /* auto */ +int amdgpu_disable_kq = -1; DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -1110,6 +1111,14 @@ module_param_named(wbrf, amdgpu_wbrf, int, 0444); MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)"); module_param_named(rebar, amdgpu_rebar, int, 0444); +/** + * DOC: disable_kq (int) + * Disable kernel queues on systems that support user queues. + * (0 = kernel queues enabled, 1 = kernel queues disabled, -1 = auto (default setting)) + */ +MODULE_PARM_DESC(disable_kq, "Disable kernel queues (-1 = auto (default), 0 = enable KQ, 1 = disable KQ)"); +module_param_named(disable_kq, amdgpu_disable_kq, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ -- GitLab From 4310acd4464bc6aed0d211d94b1d343ea3cb62f8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 13:06:19 -0500 Subject: [PATCH 233/899] drm/amdgpu: add ring flag for no user submissions This would be set by IPs which only accept submissions from the kernel, not userspace, such as when kernel queues are disabled. Don't expose the rings to userspace and reject any submissions in the CS IOCTL. v2: fix error code (Alex) Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 30 ++++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82df06a72ee02..ea047305eb646 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -349,6 +349,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, ring = amdgpu_job_ring(job); ib = &job->ibs[job->num_ibs++]; + /* submissions to kernel queues are disabled */ + if (ring->no_user_submission) + return -EINVAL; + /* MM engine doesn't support user fences */ if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b64a21773230c..4fb174863a806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -408,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_GFX: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) - if (adev->gfx.gfx_ring[i].sched.ready) + if (adev->gfx.gfx_ring[i].sched.ready && + !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; ib_start_alignment = 32; ib_size_alignment = 32; @@ -416,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) - if (adev->gfx.compute_ring[i].sched.ready) + if (adev->gfx.compute_ring[i].sched.ready && + !adev->gfx.compute_ring[i].no_user_submission) ++num_rings; ib_start_alignment = 32; ib_size_alignment = 32; @@ -424,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) - if (adev->sdma.instance[i].ring.sched.ready) + if (adev->sdma.instance[i].ring.sched.ready && + !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -435,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.harvest_config & (1 << i)) continue; - if (adev->uvd.inst[i].ring.sched.ready) + if (adev->uvd.inst[i].ring.sched.ready && + !adev->uvd.inst[i].ring.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -444,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) - if (adev->vce.ring[i].sched.ready) + if (adev->vce.ring[i].sched.ready && + !adev->vce.ring[i].no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -456,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->uvd.num_enc_rings; j++) - if (adev->uvd.inst[i].ring_enc[j].sched.ready) + if (adev->uvd.inst[i].ring_enc[j].sched.ready && + !adev->uvd.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -468,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->vcn.inst[i].ring_dec.sched.ready) + if (adev->vcn.inst[i].ring_dec.sched.ready && + !adev->vcn.inst[i].ring_dec.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -481,7 +488,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++) - if (adev->vcn.inst[i].ring_enc[j].sched.ready) + if (adev->vcn.inst[i].ring_enc[j].sched.ready && + !adev->vcn.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -496,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) - if (adev->jpeg.inst[i].ring_dec[j].sched.ready) + if (adev->jpeg.inst[i].ring_dec[j].sched.ready && + !adev->jpeg.inst[i].ring_dec[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -504,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VPE: type = AMD_IP_BLOCK_TYPE_VPE; - if (adev->vpe.ring.sched.ready) + if (adev->vpe.ring.sched.ready && + !adev->vpe.ring.no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 615c3d5c5a8d7..ec4de8df34e71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -297,6 +297,7 @@ struct amdgpu_ring { struct dma_fence *vmid_wait; bool has_compute_vm_bug; bool no_scheduler; + bool no_user_submission; int hw_prio; unsigned num_hw_submission; atomic_t *sched_score; @@ -305,7 +306,6 @@ struct amdgpu_ring { unsigned int entry_index; /* store the cached rptr to restore after reset */ uint64_t cached_rptr; - }; #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) -- GitLab From f091fa777b24c189523db1ea626434ad6ff29799 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 12:07:48 -0500 Subject: [PATCH 234/899] drm/amdgpu/gfx: add generic handling for disable_kq Add proper checks for disable_kq functionality in gfx helper functions. Add special logic for families that require the clear state setup. v2: use ring count as per Felix suggestion v3: fix num_gfx_rings handling in amdgpu_gfx_graphics_queue_acquire() v4: fix error code (Alex) Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f64675b2ab752..0f84e5afd564a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -259,8 +259,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) } /* update the number of active graphics rings */ - adev->gfx.num_gfx_rings = - bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + if (adev->gfx.num_gfx_rings) + adev->gfx.num_gfx_rings = + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); } static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, @@ -1545,6 +1546,9 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; + if (adev->gfx.disable_kq) + return -EPERM; + ret = kstrtol(buf, 0, &value); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 319e6e547c734..caaddab31023f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -485,6 +485,8 @@ struct amdgpu_gfx { struct delayed_work idle_work; bool workload_profile_active; struct mutex workload_profile_mutex; + + bool disable_kq; }; struct amdgpu_gfx_ras_reg_entry { -- GitLab From acdc43f27024cf46172b0e811f649f64a95ad2d6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 12:40:30 -0500 Subject: [PATCH 235/899] drm/amdgpu/mes: update hqd masks when disable_kq is set Make all resources available to user queues. Reviewed-by: Prike Liang Suggested-by: Sunil Khatri Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index c52071841226f..4127e11924d8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -124,14 +124,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev) * Set GFX pipe 0 queue 1-7 for MES scheduling * mask = 1111 1110b */ - adev->mes.gfx_hqd_mask[i] = 0xFE; + adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE; else /* * GFX pipe 0 queue 0 is being used by Kernel queue. * Set GFX pipe 0 queue 1 for MES scheduling * mask = 10b */ - adev->mes.gfx_hqd_mask[i] = 0x2; + adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2; } num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; @@ -142,7 +142,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { if (i >= num_pipes) break; - adev->mes.compute_hqd_mask[i] = 0xc; + adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC; } num_pipes = adev->sdma.num_instances; -- GitLab From 1f61fc28b939ba604ea63dd345f3f3ee6c627998 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 12:51:30 -0500 Subject: [PATCH 236/899] drm/amdgpu/mes: make more vmids available when disable_kq=1 If we don't have kernel queues, the vmids can be used by the MES for user queues. Acked-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 4127e11924d8d..36f2e87161264 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -106,7 +106,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; - adev->mes.vmid_mask_gfxhub = 0xffffff00; + adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00; num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 8ae4c031162bc..b6ac4c7adc8a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -896,7 +896,7 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = 8; + adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 6e15cff6d5480..d544419e3b44a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -816,7 +816,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = 8; + adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; amdgpu_vm_manager_init(adev); -- GitLab From 1e63ebc0d443c3181ec725826f0abb8b72e9a8c3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 12:16:26 -0500 Subject: [PATCH 237/899] drm/amdgpu/gfx11: add support for disable_kq Plumb in support for disabling kernel queues in GFX11. We have to bring up a GFX queue briefly in order to initialize the clear state. After that we can disable it. v2: use ring counts per Felix' suggestion v3: fix stream fault handler, enable EOP interrupts v4: fix MEC interrupt offset (Sunil) Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 191 ++++++++++++++++++------- 1 file changed, 136 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 29793caa64661..91d29f482c3ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1156,6 +1156,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; + if (adev->gfx.disable_kq) { + ring->no_scheduler = true; + ring->no_user_submission = true; + } if (!ring_id) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; @@ -1588,7 +1592,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - int i, j, k, r, ring_id = 0; + int i, j, k, r, ring_id; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ @@ -1745,37 +1749,42 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; - - r = gfx_v11_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + if (adev->gfx.num_gfx_rings) { + ring_id = 0; + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v11_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, - k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) + continue; - r = gfx_v11_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v11_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -4567,11 +4576,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) return r; } - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->gfx.disable_kq) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we don't want to set ring->ready */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + } + if (amdgpu_async_gfx_ring) + amdgpu_gfx_disable_kgq(adev, 0); + } else { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { @@ -4780,6 +4801,46 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + if (adev->gfx.disable_kq) { + unsigned int irq_type; + int m, p, r; + + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + return 0; +} + static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -4789,9 +4850,11 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v11_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { - if (amdgpu_async_gfx_ring) { + if (amdgpu_async_gfx_ring && + !adev->gfx.disable_kq) { if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } @@ -5117,11 +5180,22 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + if (amdgpu_disable_kq == 1) + adev->gfx.disable_kq = true; + adev->gfx.funcs = &gfx_v11_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + /* We need one GFX ring temporarily to set up + * the clear state. + */ + adev->gfx.num_gfx_rings = 1; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v11_0_set_kiq_pm4_funcs(adev); gfx_v11_0_set_ring_funcs(adev); @@ -5152,6 +5226,11 @@ static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); if (r) return r; + + r = gfx_v11_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -6510,27 +6589,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - default: - BUG(); - break; } } -- GitLab From 0981e0ef1803bba2e7826ef86f0569e3b63e4df6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 12:37:51 -0500 Subject: [PATCH 238/899] drm/amdgpu/gfx12: add support for disable_kq Plumb in support for disabling kernel queues. v2: use ring counts per Felix' suggestion v3: fix stream fault handler, enable EOP interrupts v4: fix MEC interrupt offset (Sunil) v5: clean up after removing extra sched.ready settings Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 167 +++++++++++++++++-------- 1 file changed, 114 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index bbc6349e5270e..3d87e445270e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1442,11 +1442,13 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) break; } - /* recalculate compute rings to use based on hardware configuration */ - num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * - adev->gfx.mec.num_queue_per_pipe) / 2; - adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, - num_compute_rings); + if (adev->gfx.num_compute_rings) { + /* recalculate compute rings to use based on hardware configuration */ + num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe) / 2; + adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, + num_compute_rings); + } /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, @@ -1492,37 +1494,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; - - r = gfx_v12_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + if (adev->gfx.num_gfx_rings) { + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v12_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, - 0, i, k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, + 0, i, k, j)) + continue; - r = gfx_v12_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v12_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -3671,6 +3677,46 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + if (adev->gfx.disable_kq) { + unsigned int irq_type; + int m, p, r; + + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + return 0; +} + static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3681,6 +3727,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v12_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -3769,11 +3816,19 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + if (amdgpu_disable_kq == 1) + adev->gfx.disable_kq = true; + adev->gfx.funcs = &gfx_v12_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + adev->gfx.num_gfx_rings = 0; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v12_0_set_kiq_pm4_funcs(adev); gfx_v12_0_set_ring_funcs(adev); @@ -3804,6 +3859,10 @@ static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block) if (r) return r; + r = gfx_v12_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -4935,27 +4994,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - default: - BUG(); - break; } } -- GitLab From 1d65006fc14e9ad13f53f604f5969090dedf7dbd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 13:09:55 -0500 Subject: [PATCH 239/899] drm/amdgpu/sdma: add flag for tracking disable_kq For SDMA, we still need kernel queues for paging so they need to be initialized, but we no not want to accept submissions from userspace when disable_kq is set. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 47d56fd0589fc..0b1fbcf0b4d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -131,6 +131,7 @@ struct amdgpu_sdma { uint32_t *ip_dump; uint32_t supported_reset; struct list_head reset_callback_list; + bool no_user_submission; }; /* -- GitLab From fcf5eb979a5803ff5b5add820f70d98953b19ecd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 13:22:49 -0500 Subject: [PATCH 240/899] drm/amdgpu/sdma6: add support for disable_kq When the parameter is set, disable user submissions to kernel queues. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 00dd7bfff01a2..2249a1ef057bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1269,6 +1269,9 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + if (amdgpu_disable_kq == 1) + adev->sdma.no_user_submission = true; + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) return r; @@ -1303,6 +1306,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); -- GitLab From 72801504fd09e86e389b5692d4d9d563a08914d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2025 13:23:55 -0500 Subject: [PATCH 241/899] drm/amdgpu/sdma7: add support for disable_kq When the parameter is set, disable user submissions to kernel queues. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 99744728f5cb3..4f66b81110bbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1254,6 +1254,9 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + if (amdgpu_disable_kq == 1) + adev->sdma.no_user_submission = true; + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) { DRM_ERROR("Failed to init sdma firmware!\n"); @@ -1289,6 +1292,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); -- GitLab From 8b2ae7d492675e8af8902f103364bef59382b935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 28 Mar 2025 18:58:17 +0100 Subject: [PATCH 242/899] drm/amdgpu: use a dummy owner for sysfs triggered cleaner shaders v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise triggering sysfs multiple times without other submissions in between only runs the shader once. v2: add some comment v3: re-add missing cast v4: squash in semicolon fix Signed-off-by: Christian König Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 0f84e5afd564a..663830c6c73b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1440,9 +1440,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct drm_gpu_scheduler *sched = &ring->sched; struct drm_sched_entity entity; + static atomic_t counter; struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; + void *owner; int i, r; /* Initialize the scheduler entity */ @@ -1453,9 +1455,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; } - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, - 64, 0, - &job); + /* + * Use some unique dummy value as the owner to make sure we execute + * the cleaner shader on each submission. The value just need to change + * for each submission and is otherwise meaningless. + */ + owner = (void *)(unsigned long)atomic_inc_return(&counter); + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, + 64, 0, &job); if (r) goto err; -- GitLab From 2aabd44aa8a3c08da3d43264c168370f6da5e81d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 8 Apr 2025 13:09:57 -0500 Subject: [PATCH 243/899] drm/amd: Forbid suspending into non-default suspend states On systems that default to 'deep' some userspace software likes to try to suspend in 'deep' first. If there is a failure for any reason (such as -ENOMEM) the failure is ignored and then it will try to use 's2idle' as a fallback. This fails, but more importantly it leads to graphical problems. Forbid this behavior and only allow suspending in the last state supported by the system. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4093 Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250408180957.4027643-1-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 804d377037095..bb5df7831308f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1150,6 +1150,7 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + suspend_state_t last_suspend_state; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b6184ee242f4f..f5e83acb61697 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2571,8 +2571,20 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; - if (!adev->in_s0ix && !adev->in_s3) + if (!adev->in_s0ix && !adev->in_s3) { + /* don't allow going deep first time followed by s2idle the next time */ + if (adev->last_suspend_state != PM_SUSPEND_ON && + adev->last_suspend_state != pm_suspend_target_state) { + drm_err_once(drm_dev, "Unsupported suspend state %d\n", + pm_suspend_target_state); + return -EINVAL; + } return 0; + } + + /* cache the state last used for suspend */ + adev->last_suspend_state = pm_suspend_target_state; + return amdgpu_device_suspend(drm_dev, true); } -- GitLab From 8ef4e996745b56ad6018e4120acf9ab530b72c2f Mon Sep 17 00:00:00 2001 From: Masha Grinman Date: Thu, 3 Apr 2025 14:08:17 -0500 Subject: [PATCH 244/899] drm/amdgpu: Move read of snoop register from guest to host Guest is reading/writing to snoop register which is a security violation We moved the code to the host driver And also added a validation on the guest side to check if it's guest Signed-off-by: Masha Grinman Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 4a43c9ab95a2b..76167fadb292b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -239,6 +239,9 @@ static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev) uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { for (j = 0; j < 5; j++) { /* DAGB instances */ -- GitLab From f0ec5926da24157bada1ebe9789c8b2527b85182 Mon Sep 17 00:00:00 2001 From: Ruili Ji Date: Mon, 31 Mar 2025 20:55:09 -0400 Subject: [PATCH 245/899] amd/amdgpu: Implement VCN queue reset for vcn 4.0.3 Add function for vcn queue reset to make driver to do fine-grained reset instead of the whole gpu reset. Reviewed-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index a1355a37cef79..139c83bd165e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1570,6 +1570,37 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + int r = 0; + int vcn_inst; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + if (amdgpu_sriov_vf(ring->adev)) + return -EOPNOTSUPP; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + vcn_inst = GET_INST(VCN, ring->me); + r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); + return r; + } + + /* This flag is not set for VF, assumed to be disabled always */ + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + vcn_v4_0_3_hw_init_inst(vinst); + vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); + r = amdgpu_ring_test_helper(ring); + + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1598,6 +1629,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_3_ring_reset, }; /** -- GitLab From 0ec7535f5ba1cb4550e45ad7d585ca746fdcae6a Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 28 Mar 2025 18:33:09 +0800 Subject: [PATCH 246/899] drm/amdgpu: remove the duplicated mes queue active state setting The MES queue deactivation and active status are already set in mes_userq_unmap|map(), so the caller needn't set the queue_active bit again. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index b469b800119ff..b9705cbec74d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -358,10 +358,8 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - if (queue->queue_active) { + if (queue->queue_active) mes_userq_unmap(uq_mgr, queue); - queue->queue_active = false; - } return 0; } @@ -379,8 +377,6 @@ static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr, DRM_ERROR("Failed to resume queue\n"); return ret; } - - queue->queue_active = true; return 0; } -- GitLab From c770ef19673fb1defcbde2ee2b91c3c89bfcf164 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 1 Apr 2025 16:04:41 +0800 Subject: [PATCH 247/899] drm/amd/amdgpu: disable ASPM in some situations disable ASPM with some ASICs on some specific platforms. required from PCIe controller owner. Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6b38b0511330e..84ddd02579bb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -85,6 +85,7 @@ #if IS_ENABLED(CONFIG_X86) #include +#include #endif MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); @@ -1873,6 +1874,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device return true; } +static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) + return false; + + if (c->x86 == 6 && + adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) { + switch (c->x86_model) { + case VFM_MODEL(INTEL_ALDERLAKE): + case VFM_MODEL(INTEL_ALDERLAKE_L): + case VFM_MODEL(INTEL_RAPTORLAKE): + case VFM_MODEL(INTEL_RAPTORLAKE_P): + case VFM_MODEL(INTEL_RAPTORLAKE_S): + return true; + default: + return false; + } + } else { + return false; + } +#else + return false; +#endif +} + /** * amdgpu_device_should_use_aspm - check if the device should program ASPM * @@ -1897,6 +1927,8 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) } if (adev->flags & AMD_IS_APU) return false; + if (amdgpu_device_aspm_support_quirk(adev)) + return false; return pcie_aspm_enabled(adev->pdev); } -- GitLab From 39938a8ed979e398faa3791a47e282c82bcc6f04 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Tue, 8 Apr 2025 16:18:28 +0800 Subject: [PATCH 248/899] drm/amdgpu: fix warning of drm_mm_clean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel doorbell BOs needs to be freed before ttm_fini. Fixes: 54c30d2a8def ("drm/amdgpu: create kernel doorbell pages") Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 84ddd02579bb1..66ed6a7924efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3550,6 +3550,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); amdgpu_seq64_fini(adev); + amdgpu_doorbell_fini(adev); } if (adev->ip_blocks[i].version->funcs->sw_fini) { r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); @@ -4903,7 +4904,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } -- GitLab From 732c6cefc1ecfc8de5d7a2029480798655d979d8 Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Wed, 9 Apr 2025 19:53:11 +0800 Subject: [PATCH 249/899] drm/amdgpu: Replace tmp_adev with hive in amdgpu_pci_slot_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking hive is more readable. The following smatch warning: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:6820 amdgpu_pci_slot_reset() warn: iterator used outside loop: 'tmp_adev' Reported-by: Dan Carpenter Signed-off-by: Ce Sun Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 66ed6a7924efd..8e150e9393c72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6794,8 +6794,8 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_reset_context reset_context; - struct amdgpu_device *tmp_adev = NULL; - struct amdgpu_hive_info *hive = NULL; + struct amdgpu_device *tmp_adev; + struct amdgpu_hive_info *hive; struct list_head device_list; int r = 0, i; u32 memsize; @@ -6856,7 +6856,7 @@ out: dev_info(adev->dev, "PCIe error recovery succeeded\n"); } else { dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r); - if (tmp_adev) { + if (hive) { list_for_each_entry(tmp_adev, &device_list, reset_list) amdgpu_device_unset_mp1_state(tmp_adev); amdgpu_device_unlock_reset_domain(adev->reset_domain); -- GitLab From 9a218d6f478edca73207f5b72efc9bbf035ea1cf Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 21 Feb 2025 20:34:32 +0800 Subject: [PATCH 250/899] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset Implement the GFX12 KCQ pipe reset, and disable the GFX12 kernel compute queue until the CPFW fully supports it. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 89 +++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 3d87e445270e4..2474006b1a340 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5316,6 +5316,89 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return amdgpu_ring_test_ring(ring); } +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r = 0; + + if (!gfx_v12_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v12_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + /* Doesn't find the F32 MEC instruction pointer register, and suppose + * the driver won't run into the F32 mode. + */ + } + + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v12_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* Need the ring test to verify the pipe reset result.*/ + return 0; +} + static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -5326,8 +5409,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v12_0_reset_compute_pipe(ring); + if (r) + return r; } r = gfx_v12_0_kcq_init_queue(ring, true); -- GitLab From 0f4dfe86fe922c37bcec99dce80a15b4d5d4726d Mon Sep 17 00:00:00 2001 From: David Rosca Date: Mon, 7 Apr 2025 13:12:11 +0200 Subject: [PATCH 251/899] drm/amdgpu: Add back JPEG to video caps for carrizo and newer JPEG is not supported on Vega only. Fixes: 0a6e7b06bdbe ("drm/amdgpu: Remove JPEG from vega and carrizo video caps") Signed-off-by: David Rosca Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 86d8bc10d90ab..9b3510e531127 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -239,6 +239,13 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, + { + .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, + .max_width = 4096, + .max_height = 4096, + .max_pixels_per_frame = 4096 * 4096, + .max_level = 0, + }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = -- GitLab From a7bb01337fcededd05ef9b6e08e64d2ac7d65724 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Apr 2025 10:45:52 -0400 Subject: [PATCH 252/899] drm/amdgpu/mes11: use the device value for enforce isolation Use the local setting rather than the global parameter. Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 06b51867c9aac..344d32268c3cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -724,7 +724,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->event_log_gpu_addr; } - if (enforce_isolation) + if (adev->enforce_isolation[0]) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, -- GitLab From b86fd212f37631551f9620e28062dd2b0c53ccdd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Apr 2025 10:47:24 -0400 Subject: [PATCH 253/899] drm/amdgpu/mes12: use the device value for enforce isolation Use the local setting rather than the global parameter. Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 8892858cfd9ae..be43e19b7b7fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -762,7 +762,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } - if (enforce_isolation) + if (adev->enforce_isolation[0]) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, -- GitLab From 2e0454b730648e9349ca54eb4a8142d77e8e7008 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Apr 2025 10:39:06 -0400 Subject: [PATCH 254/899] drm/amdgpu: adjust enforce_isolation handling Switch from a bool to an enum and allow more options for enforce isolation. There are now 3 modes of operation: - Disabled (0) - Enabled (serialization and cleaner shader) (1) - Enabled in legacy mode (no serialization or cleaner shader) (2) This provides better flexibility for more use cases. Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 22 ++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 12 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 2 +- .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 11 +++--- 12 files changed, 93 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bb5df7831308f..b156e31ac86ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -230,7 +230,7 @@ extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; extern int amdgpu_mtype_local; -extern bool enforce_isolation; +extern int amdgpu_enforce_isolation; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -873,6 +873,13 @@ struct amdgpu_init_level { struct amdgpu_reset_domain; struct amdgpu_fru_info; +enum amdgpu_enforce_isolation_mode { + AMDGPU_ENFORCE_ISOLATION_DISABLE = 0, + AMDGPU_ENFORCE_ISOLATION_ENABLE = 1, + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2, +}; + + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1225,7 +1232,7 @@ struct amdgpu_device { /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; - bool enforce_isolation[MAX_XCP]; + enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP]; struct amdgpu_isolation { void *owner; struct dma_fence *spearhead; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ea047305eb646..0941b3495b2ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -296,7 +296,21 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; - p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; + switch (p->adev->enforce_isolation[fpriv->xcp_id]) { + case AMDGPU_ENFORCE_ISOLATION_DISABLE: + default: + p->jobs[i]->enforce_isolation = false; + p->jobs[i]->run_cleaner_shader = false; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = true; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = false; + break; + } } p->gang_leader = p->jobs[p->gang_leader_idx]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8e150e9393c72..475bcd2a8a315 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2145,8 +2145,26 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - for (i = 0; i < MAX_XCP; i++) - adev->enforce_isolation[i] = !!enforce_isolation; + for (i = 0; i < MAX_XCP; i++) { + switch (amdgpu_enforce_isolation) { + case -1: + case 0: + default: + /* disable */ + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + /* enable */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + /* enable legacy mode */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + } + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f5e83acb61697..a117cd95b9dcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -179,7 +179,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; -bool enforce_isolation; +int amdgpu_enforce_isolation = -1; int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer @@ -1038,11 +1038,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); /** - * DOC: enforce_isolation (bool) - * enforce process isolation between graphics and compute via using the same reserved vmid. + * DOC: enforce_isolation (int) + * enforce process isolation between graphics and compute. + * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode) */ -module_param(enforce_isolation, bool, 0444); -MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444); +MODULE_PARM_DESC(enforce_isolation, +"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode)"); /** * DOC: modeset (int) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 663830c6c73b3..2c933d436e564 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1468,6 +1468,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; job->enforce_isolation = true; + /* always run the cleaner shader */ + job->run_cleaner_shader = true; ib = &job->ibs[0]; for (i = 0; i <= ring->funcs->align_mask; ++i) @@ -1599,7 +1601,7 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' * feature for each GPU partition. Reading from the 'enforce_isolation' * sysfs file returns the isolation settings for all partitions, where '0' - * indicates disabled and '1' indicates enabled. + * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode. * * Return: The number of bytes read from the sysfs file. */ @@ -1634,9 +1636,10 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, * @count: The size of the input data * * This function allows control over the 'enforce_isolation' feature, which - * serializes access to the graphics engine. Writing '1' or '0' to the - * 'enforce_isolation' sysfs file enables or disables process isolation for - * each partition. The input should specify the setting for all partitions. + * serializes access to the graphics engine. Writing '1', '2', or '0' to the + * 'enforce_isolation' sysfs file enables (full or legacy) or disables process + * isolation for each partition. The input should specify the setting for all + * partitions. * * Return: The number of bytes written to the sysfs file. */ @@ -1673,13 +1676,29 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return -EINVAL; for (i = 0; i < num_partitions; i++) { - if (partition_values[i] != 0 && partition_values[i] != 1) + if (partition_values[i] != 0 && + partition_values[i] != 1 && + partition_values[i] != 2) return -EINVAL; } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) - adev->enforce_isolation[i] = partition_values[i]; + for (i = 0; i < num_partitions; i++) { + switch (partition_values[i]) { + case 0: + default: + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + } + } mutex_unlock(&adev->enforce_isolation_mutex); amdgpu_mes_update_enforce_isolation(adev); @@ -2034,7 +2053,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, bool wait = false; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { /* set the initial values if nothing is set */ if (!adev->gfx.enforce_isolation_jiffies[idx]) { adev->gfx.enforce_isolation_jiffies[idx] = jiffies; @@ -2101,7 +2120,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } @@ -2138,7 +2157,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) return; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 4c4e087230ac5..359c19de9a5b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -588,7 +588,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } /* alloc a default reserved vmid to enforce isolation */ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967fff..f2c049129661f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -78,6 +78,7 @@ struct amdgpu_job { /* enforce isolation */ bool enforce_isolation; + bool run_cleaner_shader; uint32_t num_ibs; struct amdgpu_ib ibs[]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 36f2e87161264..38ea64d87a0ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -768,7 +768,7 @@ int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { mutex_lock(&adev->enforce_isolation_mutex); for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) r |= amdgpu_mes_set_enforce_isolation(adev, i, true); else r |= amdgpu_mes_set_enforce_isolation(adev, i, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index aa65d64fb15c7..3911c78f82827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -787,7 +787,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + cleaner_shader_needed = job->run_cleaner_shader && + adev->gfx.enable_cleaner_shader && ring->funcs->emit_cleaner_shader && job->base.s_fence && &job->base.s_fence->scheduled == isolation->spearhead; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 344d32268c3cd..f7aa45775eadb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -724,7 +724,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->event_log_gpu_addr; } - if (adev->enforce_isolation[0]) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index be43e19b7b7fa..b0e042a4cea19 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -762,7 +762,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } - if (adev->enforce_isolation[0]) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 2893fd5e5d003..fa28c57692b86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm, memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); - if (adev->enforce_isolation[kfd->node_id]) + if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE) packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; @@ -102,7 +102,8 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); - if (adev->enforce_isolation[knode->node_id]) + if (adev->enforce_isolation[knode->node_id] == + AMDGPU_ENFORCE_ISOLATION_ENABLE) packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; @@ -165,9 +166,9 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? - 1 : min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); + concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] == + AMDGPU_ENFORCE_ISOLATION_ENABLE) ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; -- GitLab From 309d11b4bbb402abdcf128920f353f4332f1943b Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 9 Apr 2025 12:03:00 -0400 Subject: [PATCH 255/899] drm/amd/display: Add htmldocs description for fused_io interface [Why] htmldocs build warning: "Function parameter or struct member 'fused_io' not described in 'amdgpu_display_manager'". [How] Add missing description. Fixes: ce801e5d6c1b ("drm/amd/display: HDCP Locality check using DMUB Fused IO") Reported-by: Stephen Rothwell Signed-off-by: Roman Li Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index c8201a7a1ad6c..4b615071e93bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -639,6 +639,11 @@ struct amdgpu_display_manager { */ struct amdgpu_i2c_adapter *oem_i2c; + /** + * @fused_io: + * + * dmub fused io interface + */ struct fused_io_sync { struct completion replied; char reply_data[0x40]; // Cannot include dmub_cmd here -- GitLab From a61c16258a4720065972cf04fcfee1caa6ea5fc0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 10 Apr 2025 12:23:06 -0500 Subject: [PATCH 256/899] Documentation/amdgpu: Add Ryzen AI 350 series processors These have been announced so add them to the table. Link: https://www.amd.com/en/products/processors/laptop/ryzen/ai-300-series/amd-ryzen-ai-7-350.html Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/apu-asic-info-table.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv index 5dd4b8762d19f..094e76a65dc0e 100644 --- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv +++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv @@ -13,3 +13,4 @@ Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8 Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0 +Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4 -- GitLab From 20c50a9a793300a1fc82f3ddd0e3c68f8213fbef Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Apr 2025 21:27:15 -0400 Subject: [PATCH 257/899] drm/amd/display/dml2: use vzalloc rather than kzalloc The structures are large and they do not require contiguous memory so use vzalloc. Fixes: 70839da63605 ("drm/amd/display: Add new DCN401 sources") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4126 Cc: Aurabindo Pillai Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 11 ++++++----- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 94e99e540691c..5d16f36ec95c8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include #include "dml2_internal_types.h" #include "dml_top.h" @@ -13,11 +14,11 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) { - *dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml_ctx = vzalloc(sizeof(struct dml2_context)); if (!(*dml_ctx)) return false; - (*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); if (!((*dml_ctx)->v21.dml_init.dml2_instance)) return false; @@ -27,7 +28,7 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; - (*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); if (!((*dml_ctx)->v21.mode_programming.programming)) return false; @@ -115,8 +116,8 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s void dml21_destroy(struct dml2_context *dml2) { - kfree(dml2->v21.dml_init.dml2_instance); - kfree(dml2->v21.mode_programming.programming); + vfree(dml2->v21.dml_init.dml2_instance); + vfree(dml2->v21.mode_programming.programming); } static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 04316d719426a..525b7d04bf84c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -24,6 +24,8 @@ * */ +#include + #include "display_mode_core.h" #include "dml2_internal_types.h" #include "dml2_utils.h" @@ -750,7 +752,7 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2 static inline struct dml2_context *dml2_allocate_memory(void) { - return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); } static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) @@ -824,7 +826,7 @@ void dml2_destroy(struct dml2_context *dml2) if (dml2->architecture == dml2_architecture_21) dml21_destroy(dml2); - kfree(dml2); + vfree(dml2); } void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, -- GitLab From e62a8bc5d68718c0d1e0366e2e636dd41e7ee5e4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 10 Apr 2025 19:37:06 +0530 Subject: [PATCH 258/899] drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.5.2/11.5.3 GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable the cleaner shader for additional GFX11.5.2/11.5.3 series GPUs to ensure data isolation among GPU tasks. The cleaner shader is tasked with clearing the Local Data Store (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs), which helps avoid data leakage and guarantees the accuracy of computational results. This update extends cleaner shader support to GFX11.5.2/11.5.3 GPUs, previously available for GFX11.0.3. It enhances security by clearing GPU memory between processes and maintains a consistent GPU state across KGD and KFD workloads. Cc: Mario Sopena-Novales Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 91d29f482c3ca..06ad10d06ca1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1688,6 +1688,34 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 2): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 12 && + adev->gfx.pfp_fw_version >= 15 && + adev->gfx.mec_fw_version >= 15) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 7 && + adev->gfx.pfp_fw_version >= 8 && + adev->gfx.mec_fw_version >= 8) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; -- GitLab From e21e1e8bb847b87e2e099ffdf37ed382ccba4b44 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Tue, 8 Apr 2025 14:23:45 +0800 Subject: [PATCH 259/899] drm/amdgpu: Enable per-queue reset for SDMA v4.4.2 on IP v9.5.0 Add support for per-queue reset on SDMA v4.4.2 when running with: 1. MEC firmware version 17 or later 2. DPM indicates SDMA reset is supported v2: Fixed supported firmware versions (Lijo) Signed-off-by: Jesse.Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 688a720bbbbd8..673ecd208c6d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2373,7 +2373,9 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; case IP_VERSION(9, 5, 0): - /*TODO: enable the queue reset flag until fw supported */ + if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; default: break; } -- GitLab From d6b22b1dffdae52ba5739326ea7b271ba5354db1 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 11 Apr 2025 13:12:06 +0800 Subject: [PATCH 260/899] drm/amdgpu: Set RAS EEPROM table version to v3 for umc v12_5 Set RAS EEPROM table version to v3 for umc v12_5. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index c985d58fdd7dd..2c58e09e56f95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -418,6 +418,7 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control hdr->version = RAS_TABLE_VER_V2_1; return; case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 5, 0): hdr->version = RAS_TABLE_VER_V3; return; default: -- GitLab From 18a878fd8aef0ec21648a3782f55a79790cd4073 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 11 Apr 2025 16:45:46 +0530 Subject: [PATCH 261/899] drm/amdgpu: Use generic hdp flush function Except HDP v5.2 all use a common logic for HDP flush. Use a generic function. HDP v5.2 forces NO_KIQ logic, revisit it later. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 2 ++ drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 13 +------------ drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 13 +------------ drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 13 +------------ drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 13 +------------ 6 files changed, 27 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index b6cf801939aa5..7fd8f09c28e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "amdgpu_ras.h" +#include int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev) { @@ -46,3 +47,23 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev) /* hdp ras follows amdgpu_ras_block_late_init_default for late init */ return 0; } + +void amdgpu_hdp_generic_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32((adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2, + 0); + RREG32((adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2); + } else { + amdgpu_ring_emit_wreg(ring, + (adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2, + 0); + } +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 7b8a6152dc8d9..4cfd932b7e91e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -44,4 +44,6 @@ struct amdgpu_hdp { }; int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev); +void amdgpu_hdp_generic_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38e..e6c0d86d34865 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -36,17 +36,6 @@ #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L #define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 -static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -180,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = { }; const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { - .flush_hdp = hdp_v4_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .invalidate_hdp = hdp_v4_0_invalidate_hdp, .update_clock_gating = hdp_v4_0_update_clock_gating, .get_clock_gating_state = hdp_v4_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c0797480..8bc001dc9f631 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -27,17 +27,6 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include -static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -217,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_hdp_funcs hdp_v5_0_funcs = { - .flush_hdp = hdp_v5_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .invalidate_hdp = hdp_v5_0_invalidate_hdp, .update_clock_gating = hdp_v5_0_update_clock_gating, .get_clock_gating_state = hdp_v5_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29b..ec20daf4272c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -30,17 +30,6 @@ #define regHDP_CLK_CNTL_V6_1 0xd5 #define regHDP_CLK_CNTL_V6_1_BASE_IDX 0 -static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -149,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev, } const struct amdgpu_hdp_funcs hdp_v6_0_funcs = { - .flush_hdp = hdp_v6_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .update_clock_gating = hdp_v6_0_update_clock_gating, .get_clock_gating_state = hdp_v6_0_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd117..ed1debc035073 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -27,17 +27,6 @@ #include "hdp/hdp_7_0_0_sh_mask.h" #include -static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -137,7 +126,7 @@ static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev, } const struct amdgpu_hdp_funcs hdp_v7_0_funcs = { - .flush_hdp = hdp_v7_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .update_clock_gating = hdp_v7_0_update_clock_gating, .get_clock_gating_state = hdp_v7_0_get_clockgating_state, }; -- GitLab From 5045c6c69872b9dd145e9ecbc9731d81a5954be1 Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Thu, 10 Apr 2025 22:12:24 -0400 Subject: [PATCH 262/899] drm/amdgpu: Direct ret in ras_reset_err_cnt on VF With adding sriov_vf check, we directly return EOPNOTSUPP in ras_reset_error_count as we should not do anything on VF to reset RAS error count. This also fixes the issue that loading guest driver causes register violations. Reviewed-by: Ahmad Rehman Signed-off-by: Ellen Pan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5bb7673fd28ee..17f0911ee7e91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1498,6 +1498,9 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, !amdgpu_ras_get_aca_debug_mode(adev)) return -EOPNOTSUPP; + if (amdgpu_sriov_vf(adev)) + return -EOPNOTSUPP; + /* skip ras error reset in gpu reset */ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) && ((smu_funcs && smu_funcs->set_debug_mode) || -- GitLab From 1d9bff4cf8c53d33ee2ff1b11574e5da739ce61c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 11 Apr 2025 17:40:26 +0530 Subject: [PATCH 263/899] drm/amdgpu: Use the right function for hdp flush There are a few prechecks made before HDP flush like a flush is not required on APU bare metal. Using hdp callback directly bypasses those checks. Use amdgpu_device_flush_hdp which takes care of prechecks. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 2 +- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f5dcb72a6bf5b..00eb4cfecf8f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6172,7 +6172,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -6250,7 +6250,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -6327,7 +6327,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6702,7 +6702,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 06ad10d06ca1c..ac90f823e596c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -2509,7 +2509,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2553,7 +2553,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2598,7 +2598,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3234,7 +3234,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -3452,7 +3452,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -4648,7 +4648,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 2474006b1a340..f347921fa9097 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2389,7 +2389,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2533,7 +2533,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -3503,7 +3503,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index b6ac4c7adc8a6..7648e977b44bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -965,7 +965,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 5c91d4445418c..7f5ca170f141a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -895,7 +895,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index d544419e3b44a..b645d3e6a6c81 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -297,7 +297,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -877,7 +877,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8d3560314e5b2..53050176c244d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2425,7 +2425,7 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bb5dfc410a667..215543575f477 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -533,7 +533,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 17f1ccd8bd534..f5f616ab20e70 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -610,7 +610,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 7c49c3f3c3881..256288c6cd78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -498,7 +498,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { -- GitLab From 4172b556fd5bdfdf9b2d1e42da39df6ce99ee989 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 7 Apr 2025 15:32:33 -0400 Subject: [PATCH 264/899] drm/amdkfd: add smi events for process start and end rocm-smi will be able to show the events for KFD process start/end, it is the implementation of this feature. Signed-off-by: Eric Huang Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 1 + include/uapi/linux/kfd_ioctl.h | 5 +++++ 4 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7c0c24732481e..41d7dc8c28507 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1054,6 +1054,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_process(pdd, false); + pr_debug("Releasing pdd (topology id %d, for pid %d)\n", pdd->dev->id, p->lead_thread->pid); kfd_process_device_destroy_cwsr_dgpu(pdd); @@ -1715,6 +1717,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, pdd->pasid = avm->pasid; pdd->drm_file = drm_file; + kfd_smi_event_process(pdd, true); + return 0; err_get_pasid: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 9b8169761ec5b..727a4ce29fe63 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -345,6 +345,27 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, pid, address, last - address + 1, node->id, trigger)); } +void kfd_smi_event_process(struct kfd_process_device *pdd, bool start) +{ + struct amdgpu_task_info *task_info; + struct amdgpu_vm *avm; + + if (pdd->drm_priv) + return; + + avm = drm_priv_to_vm(pdd->drm_priv); + task_info = amdgpu_vm_get_task_info_vm(avm); + + if (task_info) { + kfd_smi_event_add(0, pdd->dev, + start ? KFD_SMI_EVENT_PROCESS_START : + KFD_SMI_EVENT_PROCESS_END, + KFD_EVENT_FMT_PROCESS(task_info->pid, + task_info->task_name)); + amdgpu_vm_put_task_info(task_info); + } +} + int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) { struct kfd_smi_client *client; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index 503bff13d8153..bb4d72b57387c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -53,4 +53,5 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm); void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, unsigned long address, unsigned long last, uint32_t trigger); +void kfd_smi_event_process(struct kfd_process_device *pdd, bool start); #endif diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1e59344c5673e..04c7d283dc7d7 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -536,6 +536,8 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + KFD_SMI_EVENT_PROCESS_START = 12, + KFD_SMI_EVENT_PROCESS_END = 13, /* * max event number, as a flag bit to get events from all processes, @@ -651,6 +653,9 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) +#define KFD_EVENT_FMT_PROCESS(pid, task_name)\ + "%x %s\n", (pid), (task_name) + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- GitLab From cf93f10101f958a95023ed1b63d631c8112b17ca Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 11 Apr 2025 16:22:27 +0800 Subject: [PATCH 265/899] drm/amd/amdgpu: Fix out of bounds warning in amdgpu_hw_ip_info Fix an array index out of bounds warning in the DMA IP case of amdgpu_hw_ip_info() where it was incorrectly checking adev->gfx.gfx_ring[i].no_user_submission instead of adev->sdma.instance[i].ring.no_user_submission. The mismatch caused UBSAN to report an array bounds violation since it was accessing the GFX ring array with SDMA instance indices. Fixes: 4310acd4464b ("drm/amdgpu: add ring flag for no user submissions") Reviewed-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4fb174863a806..3d319687c1c9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -427,7 +427,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) if (adev->sdma.instance[i].ring.sched.ready && - !adev->gfx.gfx_ring[i].no_user_submission) + !adev->sdma.instance[i].ring.no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; -- GitLab From 8896abcfdd898a79d5795b0689bc944518860d21 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 11 Apr 2025 21:15:41 +0530 Subject: [PATCH 266/899] drm/amdgpu: Add PACKET3_RUN_CLEANER_SHADER_9_0 for Cleaner Shader execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the PACKET3_RUN_CLEANER_SHADER_9_0 definition, which is a command packet utilized to instruct the GPU to execute the cleaner shader for the GFX9.0 graphics architecture. The cleaner shader is a piece of GPU code that is responsible for clearing or initializing essential GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Properly clearing these resources is vital for ensuring data isolation and security between different workloads executed on the GPU. When the GPU receives this packet, it fetches and runs the cleaner shader instructions from the specified location in the packet. Thus by preventing data leaks and ensuring that previous job states do not interfere with subsequent workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15d.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index a5000c171c02c..cf93fa477674a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -552,6 +552,11 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7 +/* 1. header + * 2. RESERVED [31:0] + */ + #define PACKET3_RUN_CLEANER_SHADER 0xD2 /* 1. header * 2. RESERVED [31:0] -- GitLab From 083a0c8d172aa22ea20e75bc1d48fd205130dffa Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 11 Apr 2025 21:32:08 +0530 Subject: [PATCH 267/899] drm/amdgpu: Enhance Cleaner Shader Handling in GFX v9.0 Architecture v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit modifies the gfx_v9_0_ring_emit_cleaner_shader function to use a switch statement for cleaner shader emission based on the specific GFX IP version. The function now distinguishes between different IP versions, using PACKET3_RUN_CLEANER_SHADER_9_0 for the versions 9.0.1, 9.1.0, 9.2.1, 9.2.2, 9.3.0, and 9.4.0, while retaining PACKET3_RUN_CLEANER_SHADER for version 9.4.2. v2: Simplify logic (Alex). Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e62c77b3934a4..a63ba6642b188 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7387,8 +7387,14 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + /* Emit the cleaner shader */ - amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + else + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } -- GitLab From 6ca37b86f6c9784f514231c871effb1a4a5c3db9 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 11 Apr 2025 12:54:11 +0300 Subject: [PATCH 268/899] drm/{i915,xe}: Move intel_pch under display The only usage of the "PCH" infra is to detect which South Display Engine we should be using. Move it under display so we can convert all its callers towards intel_display struct later. No functional or code change. Signed-off-by: Rodrigo Vivi Reviewed-by: Jani Nikula Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/041e3dee494aa15c22172360f2bdd9b15e4acb00.1744364975.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/{soc => display}/intel_pch.c | 2 +- drivers/gpu/drm/i915/{soc => display}/intel_pch.h | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h | 6 ------ drivers/gpu/drm/xe/xe_device_types.h | 2 +- 7 files changed, 6 insertions(+), 13 deletions(-) rename drivers/gpu/drm/i915/{soc => display}/intel_pch.c (99%) rename drivers/gpu/drm/i915/{soc => display}/intel_pch.h (98%) delete mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c8fc271b33b77..13d4a16f7d33a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -52,7 +52,6 @@ i915-y += \ i915-y += \ soc/intel_dram.o \ soc/intel_gmch.o \ - soc/intel_pch.o \ soc/intel_rom.o # core library code @@ -282,6 +281,7 @@ i915-y += \ display/intel_modeset_setup.o \ display/intel_modeset_verify.o \ display/intel_overlay.o \ + display/intel_pch.o \ display/intel_pch_display.o \ display/intel_pch_refclk.o \ display/intel_plane_initial.o \ diff --git a/drivers/gpu/drm/i915/soc/intel_pch.c b/drivers/gpu/drm/i915/display/intel_pch.c similarity index 99% rename from drivers/gpu/drm/i915/soc/intel_pch.c rename to drivers/gpu/drm/i915/display/intel_pch.c index 82dc7fbd1a3e9..37766e40fd1cd 100644 --- a/drivers/gpu/drm/i915/soc/intel_pch.c +++ b/drivers/gpu/drm/i915/display/intel_pch.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MIT /* - * Copyright 2019 Intel Corporation. + * Copyright 2025 Intel Corporation. */ #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/soc/intel_pch.h b/drivers/gpu/drm/i915/display/intel_pch.h similarity index 98% rename from drivers/gpu/drm/i915/soc/intel_pch.h rename to drivers/gpu/drm/i915/display/intel_pch.h index 635aea7a5539d..b9fa2b2f07bce 100644 --- a/drivers/gpu/drm/i915/soc/intel_pch.h +++ b/drivers/gpu/drm/i915/display/intel_pch.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright 2019 Intel Corporation. + * Copyright 2025 Intel Corporation. */ #ifndef __INTEL_PCH__ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 236c48d282e47..65e795a4ecbab 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -38,6 +38,7 @@ #include "display/intel_display_limits.h" #include "display/intel_display_core.h" +#include "display/intel_pch.h" #include "gem/i915_gem_context_types.h" #include "gem/i915_gem_shrinker.h" @@ -49,8 +50,6 @@ #include "gt/intel_workarounds.h" #include "gt/uc/intel_uc.h" -#include "soc/intel_pch.h" - #include "i915_drm_client.h" #include "i915_gem.h" #include "i915_gpu_error.h" diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 4a99568605bd8..e4d16e7a1b12d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -193,7 +193,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ # SOC code shared with i915 xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-soc/intel_dram.o \ - i915-soc/intel_pch.o \ i915-soc/intel_rom.o # Display code shared with i915 @@ -268,6 +267,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ i915-display/intel_pmdemand.o \ + i915-display/intel_pch.o \ i915-display/intel_pps.o \ i915-display/intel_psr.o \ i915-display/intel_qp_tables.o \ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h deleted file mode 100644 index 9c46556d33a40..0000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#include "../../../i915/soc/intel_pch.h" diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 50a1579bb18e0..23bcb0ee7afd5 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -30,7 +30,7 @@ #endif #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) -#include "soc/intel_pch.h" +#include "intel_pch.h" #include "intel_display_core.h" #include "intel_display_device.h" #endif -- GitLab From ad2837640b658446325c111599b304732c41d3bb Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 11 Apr 2025 12:54:12 +0300 Subject: [PATCH 269/899] drm/i915/display: Convert intel_pch towards intel_display Now that intel_pch lives under display, let's begin its conversion towards struct intel_display. Move the pch_type to inside intel_display and convert the callers. While doing it, sort intel_display_core.h include list alphabetically. Signed-off-by: Rodrigo Vivi Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/8ffe86eb2a02153e3f866a81fb6dc8a3327a0f25.1744364975.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_display_core.h | 6 +- drivers/gpu/drm/i915/display/intel_pch.c | 242 +++++++++--------- drivers/gpu/drm/i915/display/intel_pch.h | 40 +-- drivers/gpu/drm/i915/i915_driver.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 - drivers/gpu/drm/xe/display/xe_display.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 1 - 7 files changed, 155 insertions(+), 141 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index eb6d6f2d0f757..48e47f0fd8b76 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -21,12 +21,13 @@ #include "intel_display_limits.h" #include "intel_display_params.h" #include "intel_display_power.h" +#include "intel_dmc_wl.h" #include "intel_dpll_mgr.h" #include "intel_fbc.h" #include "intel_global_state.h" #include "intel_gmbus.h" #include "intel_opregion.h" -#include "intel_dmc_wl.h" +#include "intel_pch.h" #include "intel_wm_types.h" struct task_struct; @@ -289,6 +290,9 @@ struct intel_display { /* Platform (and subplatform, if any) identification */ struct intel_display_platforms platform; + /* Intel PCH: where the south display engine lives */ + enum intel_pch pch_type; + /* Display functions */ struct { /* Top level crtc-ish functions */ diff --git a/drivers/gpu/drm/i915/display/intel_pch.c b/drivers/gpu/drm/i915/display/intel_pch.c index 37766e40fd1cd..e63480dd01fe9 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.c +++ b/drivers/gpu/drm/i915/display/intel_pch.c @@ -39,139 +39,150 @@ /* Map PCH device id to PCH type, or PCH_NONE if unknown. */ static enum intel_pch -intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) +intel_pch_type(const struct intel_display *display, unsigned short id) { switch (id) { case INTEL_PCH_IBX_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Ibex Peak PCH\n"); - drm_WARN_ON(&dev_priv->drm, GRAPHICS_VER(dev_priv) != 5); + drm_dbg_kms(display->drm, "Found Ibex Peak PCH\n"); + drm_WARN_ON(display->drm, DISPLAY_VER(display) != 5); return PCH_IBX; case INTEL_PCH_CPT_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found CougarPoint PCH\n"); - drm_WARN_ON(&dev_priv->drm, - GRAPHICS_VER(dev_priv) != 6 && !IS_IVYBRIDGE(dev_priv)); + drm_dbg_kms(display->drm, "Found CougarPoint PCH\n"); + drm_WARN_ON(display->drm, + DISPLAY_VER(display) != 6 && + !display->platform.ivybridge); return PCH_CPT; case INTEL_PCH_PPT_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found PantherPoint PCH\n"); - drm_WARN_ON(&dev_priv->drm, - GRAPHICS_VER(dev_priv) != 6 && !IS_IVYBRIDGE(dev_priv)); + drm_dbg_kms(display->drm, "Found PantherPoint PCH\n"); + drm_WARN_ON(display->drm, + DISPLAY_VER(display) != 6 && + !display->platform.ivybridge); /* PPT is CPT compatible */ return PCH_CPT; case INTEL_PCH_LPT_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found LynxPoint PCH\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - drm_WARN_ON(&dev_priv->drm, - IS_HASWELL_ULT(dev_priv) || IS_BROADWELL_ULT(dev_priv)); + drm_dbg_kms(display->drm, "Found LynxPoint PCH\n"); + drm_WARN_ON(display->drm, + !display->platform.haswell && + !display->platform.broadwell); + drm_WARN_ON(display->drm, + display->platform.haswell_ult || + display->platform.broadwell_ult); return PCH_LPT_H; case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found LynxPoint LP PCH\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - drm_WARN_ON(&dev_priv->drm, - !IS_HASWELL_ULT(dev_priv) && !IS_BROADWELL_ULT(dev_priv)); + drm_dbg_kms(display->drm, "Found LynxPoint LP PCH\n"); + drm_WARN_ON(display->drm, + !display->platform.haswell && + !display->platform.broadwell); + drm_WARN_ON(display->drm, + !display->platform.haswell_ult && + !display->platform.broadwell_ult); return PCH_LPT_LP; case INTEL_PCH_WPT_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint PCH\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - drm_WARN_ON(&dev_priv->drm, - IS_HASWELL_ULT(dev_priv) || IS_BROADWELL_ULT(dev_priv)); + drm_dbg_kms(display->drm, "Found WildcatPoint PCH\n"); + drm_WARN_ON(display->drm, + !display->platform.haswell && + !display->platform.broadwell); + drm_WARN_ON(display->drm, + display->platform.haswell_ult || + display->platform.broadwell_ult); /* WPT is LPT compatible */ return PCH_LPT_H; case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint LP PCH\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)); - drm_WARN_ON(&dev_priv->drm, - !IS_HASWELL_ULT(dev_priv) && !IS_BROADWELL_ULT(dev_priv)); + drm_dbg_kms(display->drm, "Found WildcatPoint LP PCH\n"); + drm_WARN_ON(display->drm, + !display->platform.haswell && + !display->platform.broadwell); + drm_WARN_ON(display->drm, + !display->platform.haswell_ult && + !display->platform.broadwell_ult); /* WPT is LPT compatible */ return PCH_LPT_LP; case INTEL_PCH_SPT_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint PCH\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found SunrisePoint PCH\n"); + drm_WARN_ON(display->drm, + !display->platform.skylake && + !display->platform.kabylake); return PCH_SPT; case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found SunrisePoint LP PCH\n"); + drm_WARN_ON(display->drm, + !display->platform.skylake && + !display->platform.kabylake && + !display->platform.coffeelake && + !display->platform.cometlake); return PCH_SPT; case INTEL_PCH_KBP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found Kaby Lake PCH (KBP)\n"); + drm_WARN_ON(display->drm, + !display->platform.skylake && + !display->platform.kabylake && + !display->platform.coffeelake && + !display->platform.cometlake); /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found Cannon Lake PCH (CNP)\n"); + drm_WARN_ON(display->drm, + !display->platform.coffeelake && + !display->platform.cometlake); return PCH_CNP; case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Found Cannon Lake LP PCH (CNP-LP)\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv)); + drm_WARN_ON(display->drm, + !display->platform.coffeelake && + !display->platform.cometlake); return PCH_CNP; case INTEL_PCH_CMP_DEVICE_ID_TYPE: case INTEL_PCH_CMP2_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv) && - !IS_ROCKETLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found Comet Lake PCH (CMP)\n"); + drm_WARN_ON(display->drm, + !display->platform.coffeelake && + !display->platform.cometlake && + !display->platform.rocketlake); /* CMP is CNP compatible */ return PCH_CNP; case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n"); - drm_WARN_ON(&dev_priv->drm, - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found Comet Lake V PCH (CMP-V)\n"); + drm_WARN_ON(display->drm, + !display->platform.coffeelake && + !display->platform.cometlake); /* CMP-V is based on KBP, which is SPT compatible */ return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: case INTEL_PCH_ICP2_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n"); - drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found Ice Lake PCH\n"); + drm_WARN_ON(display->drm, !display->platform.icelake); return PCH_ICP; case INTEL_PCH_MCC_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Mule Creek Canyon PCH\n"); - drm_WARN_ON(&dev_priv->drm, !(IS_JASPERLAKE(dev_priv) || - IS_ELKHARTLAKE(dev_priv))); + drm_dbg_kms(display->drm, "Found Mule Creek Canyon PCH\n"); + drm_WARN_ON(display->drm, !(display->platform.jasperlake || + display->platform.elkhartlake)); /* MCC is TGP compatible */ return PCH_TGP; case INTEL_PCH_TGP_DEVICE_ID_TYPE: case INTEL_PCH_TGP2_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n"); - drm_WARN_ON(&dev_priv->drm, !IS_TIGERLAKE(dev_priv) && - !IS_ROCKETLAKE(dev_priv) && - !IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv) && - !IS_COMETLAKE(dev_priv)); + drm_dbg_kms(display->drm, "Found Tiger Lake LP PCH\n"); + drm_WARN_ON(display->drm, !display->platform.tigerlake && + !display->platform.rocketlake && + !display->platform.skylake && + !display->platform.kabylake && + !display->platform.coffeelake && + !display->platform.cometlake); return PCH_TGP; case INTEL_PCH_JSP_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n"); - drm_WARN_ON(&dev_priv->drm, !(IS_JASPERLAKE(dev_priv) || - IS_ELKHARTLAKE(dev_priv))); + drm_dbg_kms(display->drm, "Found Jasper Lake PCH\n"); + drm_WARN_ON(display->drm, !(display->platform.jasperlake || + display->platform.elkhartlake)); /* JSP is ICP compatible */ return PCH_ICP; case INTEL_PCH_ADP_DEVICE_ID_TYPE: case INTEL_PCH_ADP2_DEVICE_ID_TYPE: case INTEL_PCH_ADP3_DEVICE_ID_TYPE: case INTEL_PCH_ADP4_DEVICE_ID_TYPE: - drm_dbg_kms(&dev_priv->drm, "Found Alder Lake PCH\n"); - drm_WARN_ON(&dev_priv->drm, !IS_ALDERLAKE_S(dev_priv) && - !IS_ALDERLAKE_P(dev_priv)); + drm_dbg_kms(display->drm, "Found Alder Lake PCH\n"); + drm_WARN_ON(display->drm, !display->platform.alderlake_s && + !display->platform.alderlake_p); return PCH_ADP; default: return PCH_NONE; @@ -189,7 +200,7 @@ static bool intel_is_virt_pch(unsigned short id, } static void -intel_virt_detect_pch(const struct drm_i915_private *dev_priv, +intel_virt_detect_pch(const struct intel_display *display, unsigned short *pch_id, enum intel_pch *pch_type) { unsigned short id = 0; @@ -201,44 +212,45 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv, * make an educated guess as to which PCH is really there. */ - if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) + if (display->platform.alderlake_s || display->platform.alderlake_p) id = INTEL_PCH_ADP_DEVICE_ID_TYPE; - else if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv)) + else if (display->platform.tigerlake || display->platform.rocketlake) id = INTEL_PCH_TGP_DEVICE_ID_TYPE; - else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) + else if (display->platform.jasperlake || display->platform.elkhartlake) id = INTEL_PCH_MCC_DEVICE_ID_TYPE; - else if (IS_ICELAKE(dev_priv)) + else if (display->platform.icelake) id = INTEL_PCH_ICP_DEVICE_ID_TYPE; - else if (IS_COFFEELAKE(dev_priv) || - IS_COMETLAKE(dev_priv)) + else if (display->platform.coffeelake || + display->platform.cometlake) id = INTEL_PCH_CNP_DEVICE_ID_TYPE; - else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) + else if (display->platform.kabylake || display->platform.skylake) id = INTEL_PCH_SPT_DEVICE_ID_TYPE; - else if (IS_HASWELL_ULT(dev_priv) || IS_BROADWELL_ULT(dev_priv)) + else if (display->platform.haswell_ult || + display->platform.broadwell_ult) id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + else if (display->platform.haswell || display->platform.broadwell) id = INTEL_PCH_LPT_DEVICE_ID_TYPE; - else if (GRAPHICS_VER(dev_priv) == 6 || IS_IVYBRIDGE(dev_priv)) + else if (DISPLAY_VER(display) == 6 || display->platform.ivybridge) id = INTEL_PCH_CPT_DEVICE_ID_TYPE; - else if (GRAPHICS_VER(dev_priv) == 5) + else if (DISPLAY_VER(display) == 5) id = INTEL_PCH_IBX_DEVICE_ID_TYPE; if (id) - drm_dbg_kms(&dev_priv->drm, "Assuming PCH ID %04x\n", id); + drm_dbg_kms(display->drm, "Assuming PCH ID %04x\n", id); else - drm_dbg_kms(&dev_priv->drm, "Assuming no PCH\n"); + drm_dbg_kms(display->drm, "Assuming no PCH\n"); - *pch_type = intel_pch_type(dev_priv, id); + *pch_type = intel_pch_type(display, id); /* Sanity check virtual PCH id */ - if (drm_WARN_ON(&dev_priv->drm, + if (drm_WARN_ON(display->drm, id && *pch_type == PCH_NONE)) id = 0; *pch_id = id; } -void intel_detect_pch(struct drm_i915_private *dev_priv) +void intel_detect_pch(struct intel_display *display) { struct pci_dev *pch = NULL; unsigned short id; @@ -248,21 +260,21 @@ void intel_detect_pch(struct drm_i915_private *dev_priv) * South display engine on the same PCI device: just assign the fake * PCH. */ - if (DISPLAY_VER(dev_priv) >= 20) { - dev_priv->pch_type = PCH_LNL; + if (DISPLAY_VER(display) >= 20) { + display->pch_type = PCH_LNL; return; - } else if (IS_BATTLEMAGE(dev_priv) || IS_METEORLAKE(dev_priv)) { + } else if (display->platform.battlemage || display->platform.meteorlake) { /* * Both north display and south display are on the SoC die. * The real PCH (if it even exists) is uninvolved in display. */ - dev_priv->pch_type = PCH_MTL; + display->pch_type = PCH_MTL; return; - } else if (IS_DG2(dev_priv)) { - dev_priv->pch_type = PCH_DG2; + } else if (display->platform.dg2) { + display->pch_type = PCH_DG2; return; - } else if (IS_DG1(dev_priv)) { - dev_priv->pch_type = PCH_DG1; + } else if (display->platform.dg1) { + display->pch_type = PCH_DG1; return; } @@ -283,14 +295,14 @@ void intel_detect_pch(struct drm_i915_private *dev_priv) id = pch->device & INTEL_PCH_DEVICE_ID_MASK; - pch_type = intel_pch_type(dev_priv, id); + pch_type = intel_pch_type(display, id); if (pch_type != PCH_NONE) { - dev_priv->pch_type = pch_type; + display->pch_type = pch_type; break; } else if (intel_is_virt_pch(id, pch->subsystem_vendor, pch->subsystem_device)) { - intel_virt_detect_pch(dev_priv, &id, &pch_type); - dev_priv->pch_type = pch_type; + intel_virt_detect_pch(display, &id, &pch_type); + display->pch_type = pch_type; break; } } @@ -299,16 +311,16 @@ void intel_detect_pch(struct drm_i915_private *dev_priv) * Use PCH_NOP (PCH but no South Display) for PCH platforms without * display. */ - if (pch && !HAS_DISPLAY(dev_priv)) { - drm_dbg_kms(&dev_priv->drm, + if (pch && !HAS_DISPLAY(display)) { + drm_dbg_kms(display->drm, "Display disabled, reverting to NOP PCH\n"); - dev_priv->pch_type = PCH_NOP; + display->pch_type = PCH_NOP; } else if (!pch) { - if (i915_run_as_guest() && HAS_DISPLAY(dev_priv)) { - intel_virt_detect_pch(dev_priv, &id, &pch_type); - dev_priv->pch_type = pch_type; + if (i915_run_as_guest() && HAS_DISPLAY(display)) { + intel_virt_detect_pch(display, &id, &pch_type); + display->pch_type = pch_type; } else { - drm_dbg_kms(&dev_priv->drm, "No PCH found.\n"); + drm_dbg_kms(display->drm, "No PCH found.\n"); } } diff --git a/drivers/gpu/drm/i915/display/intel_pch.h b/drivers/gpu/drm/i915/display/intel_pch.h index b9fa2b2f07bce..b2b309a9fe6b8 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.h +++ b/drivers/gpu/drm/i915/display/intel_pch.h @@ -6,7 +6,9 @@ #ifndef __INTEL_PCH__ #define __INTEL_PCH__ -struct drm_i915_private; +#include "intel_display_conversion.h" + +struct intel_display; /* * Sorted by south display engine compatibility. @@ -34,23 +36,23 @@ enum intel_pch { PCH_LNL, }; -#define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) -#define HAS_PCH_DG2(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_DG2) -#define HAS_PCH_ADP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ADP) -#define HAS_PCH_DG1(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_DG1) -#define HAS_PCH_TGP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_TGP) -#define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) -#define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) -#define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) -#define HAS_PCH_LPT_H(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT_H) -#define HAS_PCH_LPT_LP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT_LP) -#define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT_H || \ - INTEL_PCH_TYPE(dev_priv) == PCH_LPT_LP) -#define HAS_PCH_CPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CPT) -#define HAS_PCH_IBX(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_IBX) -#define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP) -#define HAS_PCH_SPLIT(dev_priv) (INTEL_PCH_TYPE(dev_priv) != PCH_NONE) - -void intel_detect_pch(struct drm_i915_private *dev_priv); +#define INTEL_PCH_TYPE(_display) (__to_intel_display(_display)->pch_type) +#define HAS_PCH_DG2(display) (INTEL_PCH_TYPE(display) == PCH_DG2) +#define HAS_PCH_ADP(display) (INTEL_PCH_TYPE(display) == PCH_ADP) +#define HAS_PCH_DG1(display) (INTEL_PCH_TYPE(display) == PCH_DG1) +#define HAS_PCH_TGP(display) (INTEL_PCH_TYPE(display) == PCH_TGP) +#define HAS_PCH_ICP(display) (INTEL_PCH_TYPE(display) == PCH_ICP) +#define HAS_PCH_CNP(display) (INTEL_PCH_TYPE(display) == PCH_CNP) +#define HAS_PCH_SPT(display) (INTEL_PCH_TYPE(display) == PCH_SPT) +#define HAS_PCH_LPT_H(display) (INTEL_PCH_TYPE(display) == PCH_LPT_H) +#define HAS_PCH_LPT_LP(display) (INTEL_PCH_TYPE(display) == PCH_LPT_LP) +#define HAS_PCH_LPT(display) (INTEL_PCH_TYPE(display) == PCH_LPT_H || \ + INTEL_PCH_TYPE(display) == PCH_LPT_LP) +#define HAS_PCH_CPT(display) (INTEL_PCH_TYPE(display) == PCH_CPT) +#define HAS_PCH_IBX(display) (INTEL_PCH_TYPE(display) == PCH_IBX) +#define HAS_PCH_NOP(display) (INTEL_PCH_TYPE(display) == PCH_NOP) +#define HAS_PCH_SPLIT(display) (INTEL_PCH_TYPE(display) != PCH_NONE) + +void intel_detect_pch(struct intel_display *display); #endif /* __INTEL_PCH__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index f5262b8ad237d..787a9864c687e 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -264,7 +264,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) i915_gem_init_early(dev_priv); /* This must be called before any calls to HAS_PCH_* */ - intel_detect_pch(dev_priv); + intel_detect_pch(display); intel_irq_init(dev_priv); intel_display_driver_early_probe(display); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65e795a4ecbab..38cd1750bc994 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -271,9 +271,6 @@ struct drm_i915_private { /* pm private clock gating functions */ const struct drm_i915_clock_gating_funcs *clock_gating_funcs; - /* PCH chipset type */ - enum intel_pch pch_type; - unsigned long gem_quirks; struct i915_gem_mm mm; diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 7ddda8f5b1c48..5d137356a5875 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -134,7 +134,7 @@ int xe_display_init_early(struct xe_device *xe) spin_lock_init(&xe->uncore.lock); /* This must be called before any calls to HAS_PCH_* */ - intel_detect_pch(xe); + intel_detect_pch(display); intel_display_driver_early_probe(display); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 23bcb0ee7afd5..6a287549fb57c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -560,7 +560,6 @@ struct xe_device { * migrating to the right sub-structs */ struct intel_display display; - enum intel_pch pch_type; struct dram_info { bool wm_lv_0_adjust_needed; -- GitLab From 3090ea0344dad526186235cf540dcbaf1c353f11 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 11 Apr 2025 12:54:13 +0300 Subject: [PATCH 270/899] drm/i915/pch: move PCH detection to intel_display_driver_early_probe() Make PCH detection part of display. For now, call it also for !HAS_DISPLAY() to avoid functional changes here. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/de70b35b170c9a74edddb497a209eb10427b77de.1744364975.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_driver.c | 3 +++ drivers/gpu/drm/i915/display/intel_pch.c | 2 +- drivers/gpu/drm/i915/display/intel_pch.h | 2 +- drivers/gpu/drm/i915/i915_driver.c | 3 --- drivers/gpu/drm/xe/display/xe_display.c | 3 --- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index efee8925987e5..e4192f6cb0c75 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -180,6 +180,9 @@ static void intel_plane_possible_crtcs_init(struct intel_display *display) void intel_display_driver_early_probe(struct intel_display *display) { + /* This must be called before any calls to HAS_PCH_* */ + intel_pch_detect(display); + if (!HAS_DISPLAY(display)) return; diff --git a/drivers/gpu/drm/i915/display/intel_pch.c b/drivers/gpu/drm/i915/display/intel_pch.c index e63480dd01fe9..c5045d2527b44 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.c +++ b/drivers/gpu/drm/i915/display/intel_pch.c @@ -250,7 +250,7 @@ intel_virt_detect_pch(const struct intel_display *display, *pch_id = id; } -void intel_detect_pch(struct intel_display *display) +void intel_pch_detect(struct intel_display *display) { struct pci_dev *pch = NULL; unsigned short id; diff --git a/drivers/gpu/drm/i915/display/intel_pch.h b/drivers/gpu/drm/i915/display/intel_pch.h index b2b309a9fe6b8..cf4dab1b98bf0 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.h +++ b/drivers/gpu/drm/i915/display/intel_pch.h @@ -53,6 +53,6 @@ enum intel_pch { #define HAS_PCH_NOP(display) (INTEL_PCH_TYPE(display) == PCH_NOP) #define HAS_PCH_SPLIT(display) (INTEL_PCH_TYPE(display) != PCH_NONE) -void intel_detect_pch(struct intel_display *display); +void intel_pch_detect(struct intel_display *display); #endif /* __INTEL_PCH__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 787a9864c687e..97ff9855b5de0 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -263,9 +263,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) i915_gem_init_early(dev_priv); - /* This must be called before any calls to HAS_PCH_* */ - intel_detect_pch(display); - intel_irq_init(dev_priv); intel_display_driver_early_probe(display); intel_clock_gating_hooks_init(dev_priv); diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 5d137356a5875..762f5c423faf2 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -133,9 +133,6 @@ int xe_display_init_early(struct xe_device *xe) /* Fake uncore lock */ spin_lock_init(&xe->uncore.lock); - /* This must be called before any calls to HAS_PCH_* */ - intel_detect_pch(display); - intel_display_driver_early_probe(display); /* Early display init.. */ -- GitLab From 6a5cfab072d6f7b1277d491a17aecf6ea9acbc68 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 11 Apr 2025 12:54:14 +0300 Subject: [PATCH 271/899] drm/i915/pch: clean up includes We no longer need i915_drv.h in intel_pch.c, and we no longer need intel_pch.h universally. With intel_pch.h being included from intel_display_core.h, it's still included pretty much everywhere, but there's no need to include it explicitly from i915_drv.h or xe_device_types.h. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/68ec70f6880b7af19bc93b9817959299634a555d.1744364975.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_pch.c | 4 +++- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/xe/xe_device_types.h | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_pch.c b/drivers/gpu/drm/i915/display/intel_pch.c index c5045d2527b44..fec1919e53867 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.c +++ b/drivers/gpu/drm/i915/display/intel_pch.c @@ -3,8 +3,10 @@ * Copyright 2025 Intel Corporation. */ -#include "i915_drv.h" +#include + #include "i915_utils.h" +#include "intel_display_core.h" #include "intel_pch.h" #define INTEL_PCH_DEVICE_ID_MASK 0xff80 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38cd1750bc994..c0eec8fe5cad0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -38,7 +38,6 @@ #include "display/intel_display_limits.h" #include "display/intel_display_core.h" -#include "display/intel_pch.h" #include "gem/i915_gem_context_types.h" #include "gem/i915_gem_shrinker.h" diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6a287549fb57c..e620f4f434c82 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -30,7 +30,6 @@ #endif #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) -#include "intel_pch.h" #include "intel_display_core.h" #include "intel_display_device.h" #endif -- GitLab From c3abed53ca13f658c08727bd594f3874d216bde5 Mon Sep 17 00:00:00 2001 From: Shane Xiao Date: Thu, 10 Apr 2025 12:35:15 +0800 Subject: [PATCH 272/899] drm/amdkfd: Add rec SDMA engines support with limited XGMI This patch adds recommended SDMA engines with limited XGMI SDMA engines. It will help improve overall performance for device to device copies with this optimization. v2: Update the formatting issues and data type Signed-off-by: Shane Xiao Suggested-by: Jonathan Kim Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 41 +++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9bbee484d57cc..baa2374acdeb5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, { struct kfd_node *gpu = outbound_link->gpu; struct amdgpu_device *adev = gpu->adev; - int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu); + unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu); + uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1; + uint32_t xgmi_sdma_eng_id_mask = + ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && - kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && - (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) && + num_xgmi_nodes == 8); if (support_rec_eng) { int src_socket_id = adev->gmc.xgmi.physical_node_id; int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0; outbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift); inbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; - } else { - int num_sdma_eng = kfd_get_num_sdma_engines(gpu); - int i, eng_offset = 0; + 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift); - if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { - eng_offset = num_sdma_eng; - num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); - } + /* If recommended engine is out of range, need to reset the mask */ + if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) + outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) + inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; - for (i = 0; i < num_sdma_eng; i++) { - outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - } + } else { + uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask : + sdma_eng_id_mask; + + outbound_link->rec_sdma_eng_id_mask = engine_mask; + inbound_link->rec_sdma_eng_id_mask = engine_mask; } } -- GitLab From 7a429a14f852c6a8cd4710078613cf2bd2a4eb6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Wed, 9 Apr 2025 08:49:09 +0300 Subject: [PATCH 273/899] drm/i915/vrr: Stop writing VRR_CTL_IGN_MAX_SHIFT for MTL onwards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Bspec VRR_CTL_IGN_MAX_SHIFT doesn't exist for MTL and onwards. On LunarLake and onwards Bit 30 is "Mask Block PkgC" instead. Stop writing the bit for MeteorLake and onwards v2: "Ignore Max Shift" bit doesn't exist on MeteorLake either Bspec: 50508, 68925 Signed-off-by: Jouni Högander Reviewed-by: Ankit Nautiyal Link: https://lore.kernel.org/r/20250409054909.968531-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 633a66f6b73be..c6565baf815a1 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -444,7 +444,10 @@ static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(display) >= 13) + if (DISPLAY_VER(display) >= 14) + return VRR_CTL_FLIP_LINE_EN | + XELPD_VRR_CTL_VRR_GUARDBAND(crtc_state->vrr.guardband); + else if (DISPLAY_VER(display) >= 13) return VRR_CTL_IGN_MAX_SHIFT | VRR_CTL_FLIP_LINE_EN | XELPD_VRR_CTL_VRR_GUARDBAND(crtc_state->vrr.guardband); else -- GitLab From d3815ae24f25fea2f94c99c975da05b0a521f6c2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 11 Apr 2025 13:27:15 +0300 Subject: [PATCH 274/899] drm/i915/dpio: have chv_data_lane_soft_reset() get/put dpio internally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have chv_data_lane_soft_reset() get/put dpio internally, and use a locked version of it within intel_dpio_phy.c. This drops the dependency on vlv sideband from g4x_dp.c and g4x_hdmi.c, and makes that a DPIO PHY implementation detail. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250411102715.613082-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/g4x_dp.c | 7 ------- drivers/gpu/drm/i915/display/g4x_hdmi.c | 8 ------- drivers/gpu/drm/i915/display/intel_dpio_phy.c | 21 ++++++++++++++----- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index b39aae9165df6..18e51799d2a67 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -28,7 +28,6 @@ #include "intel_hotplug.h" #include "intel_pch_display.h" #include "intel_pps.h" -#include "vlv_sideband.h" static const struct dpll g4x_dpll[] = { { .dot = 162000, .p1 = 2, .p2 = 10, .n = 2, .m1 = 23, .m2 = 8, }, @@ -581,16 +580,10 @@ static void chv_post_disable_dp(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - intel_dp_link_down(encoder, old_crtc_state); - vlv_dpio_get(dev_priv); - /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - - vlv_dpio_put(dev_priv); } static void diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 3dc2c59a3df04..21b5db2fa203f 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -22,7 +22,6 @@ #include "intel_hdmi.h" #include "intel_hotplug.h" #include "intel_sdvo.h" -#include "vlv_sideband.h" static void intel_hdmi_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) @@ -539,15 +538,8 @@ static void chv_hdmi_post_disable(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(display->drm); - - vlv_dpio_get(dev_priv); - /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - - vlv_dpio_put(dev_priv); } static void chv_hdmi_pre_enable(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 429f895437890..1e1af71507235 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -808,9 +808,9 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_put(dev_priv); } -void chv_data_lane_soft_reset(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - bool reset) +static void __chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + bool reset) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); @@ -853,6 +853,17 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, } } +void chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + bool reset) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + vlv_dpio_get(i915); + __chv_data_lane_soft_reset(encoder, crtc_state, reset); + vlv_dpio_put(i915); +} + void chv_phy_pre_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -880,7 +891,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, vlv_dpio_get(dev_priv); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, crtc_state, true); + __chv_data_lane_soft_reset(encoder, crtc_state, true); /* program left/right clock distribution */ if (pipe != PIPE_B) { @@ -1008,7 +1019,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, } /* Deassert data lane reset */ - chv_data_lane_soft_reset(encoder, crtc_state, false); + __chv_data_lane_soft_reset(encoder, crtc_state, false); vlv_dpio_put(dev_priv); } -- GitLab From 2eb0e67ef063835b3fa5a8b8feaf6beae024b060 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 14 Apr 2025 14:29:43 +0300 Subject: [PATCH 275/899] drm/i915: use 32-bit access for gen2 irq registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've previously switched from 16-bit to 32-bit access for gen2 irq registers, but one was left behind. Fix it. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/5a56286c94e08a02435c60ce0fbff13aca6c0d1f.1744630147.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 819ab933bb105..df16c2b86b9da 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1782,8 +1782,6 @@ static void gt_record_display_regs(struct intel_gt_coredump *gt) gt->ier = intel_uncore_read(uncore, VLV_IER); else if (HAS_PCH_SPLIT(i915)) gt->ier = intel_uncore_read(uncore, DEIER); - else if (GRAPHICS_VER(i915) == 2) - gt->ier = intel_uncore_read16(uncore, GEN2_IER); else gt->ier = intel_uncore_read(uncore, GEN2_IER); } -- GitLab From ef32101873f477fdfb5ac6e9cfed7c9c571636c9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 14 Apr 2025 14:29:44 +0300 Subject: [PATCH 276/899] drm/i915: record GEN2_IER in gtier[0] for pre-ilk error capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pre-ilk platforms the engine interrupts live in GEN2_IER. Capture it as part of gtier instead of display. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/f637219fe3accb69963266773b9ef7c1131875e4.1744630147.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gpu_error.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index df16c2b86b9da..b58bdb4b962bc 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1782,8 +1782,6 @@ static void gt_record_display_regs(struct intel_gt_coredump *gt) gt->ier = intel_uncore_read(uncore, VLV_IER); else if (HAS_PCH_SPLIT(i915)) gt->ier = intel_uncore_read(uncore, DEIER); - else - gt->ier = intel_uncore_read(uncore, GEN2_IER); } /* Capture all other registers that GuC doesn't capture. */ @@ -1822,6 +1820,9 @@ static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt) } else if (HAS_PCH_SPLIT(i915)) { gt->gtier[0] = intel_uncore_read(uncore, GTIER); gt->ngtier = 1; + } else { + gt->gtier[0] = intel_uncore_read(uncore, GEN2_IER); + gt->ngtier = 1; } gt->eir = intel_uncore_read(uncore, EIR); -- GitLab From 79cef51541eab6294da2b1a0f576dded66871064 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 14 Apr 2025 14:29:45 +0300 Subject: [PATCH 277/899] drm/i915: stop recording IER in error capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With pre-ilk GEN2_IER capture moved to gtier[0], the remaining IER aren't all that relevant. Stop capturing them. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/6f1130a3d0d13e08a73ba381225ab978b22a9345.1744630147.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gpu_error.c | 8 -------- drivers/gpu/drm/i915/i915_gpu_error.h | 1 - 2 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b58bdb4b962bc..cb11c10d922ac 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -729,7 +729,6 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m, static void err_print_gt_display(struct drm_i915_error_state_buf *m, struct intel_gt_coredump *gt) { - err_printf(m, "IER: 0x%08x\n", gt->ier); err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); } @@ -1775,13 +1774,6 @@ static void gt_record_display_regs(struct intel_gt_coredump *gt) if (DISPLAY_VER(i915) >= 6 && DISPLAY_VER(i915) < 20) gt->derrmr = intel_uncore_read(uncore, DERRMR); - - if (GRAPHICS_VER(i915) >= 8) - gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); - else if (IS_VALLEYVIEW(i915)) - gt->ier = intel_uncore_read(uncore, VLV_IER); - else if (HAS_PCH_SPLIT(i915)) - gt->ier = intel_uncore_read(uncore, DEIER); } /* Capture all other registers that GuC doesn't capture. */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 749e1c55613e8..ac55603a81d76 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -146,7 +146,6 @@ struct intel_gt_coredump { /* Generic register state */ u32 eir; u32 pgtbl_er; - u32 ier; u32 gtier[6], ngtier; u32 forcewake; u32 error; /* gen6+ */ -- GitLab From 737c725b2c88c4defd3fc92bb934ee6804a4d851 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 14 Apr 2025 14:29:46 +0300 Subject: [PATCH 278/899] drm/i915: use display snapshot mechanism for display irq regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move more display specific parts of GPU error logging behind the display snapshot interface. With the display register capture reduced to just one register, DERRMR, there's quite a bit of boilerplate here. However, it's still a nice abstraction and removes a DISPLAY_VER() usage from core i915. With this approach, it's also easy to add to xe as needed. v2: Remove stale comment Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/13206969df04426d290d2863dc574e22ca45193a.1744630147.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_display_irq.c | 28 +++++++++++++++++++ .../gpu/drm/i915/display/intel_display_irq.h | 5 ++++ .../drm/i915/display/intel_display_snapshot.c | 5 ++++ drivers/gpu/drm/i915/i915_gpu_error.c | 18 ------------ drivers/gpu/drm/i915/i915_gpu_error.h | 2 -- 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index d2a35e3630b1d..f5dc050a63441 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -2329,3 +2329,31 @@ void intel_display_irq_init(struct intel_display *display) INIT_WORK(&display->irq.vblank_dc_work, intel_display_vblank_dc_work); } + +struct intel_display_irq_snapshot { + u32 derrmr; +}; + +struct intel_display_irq_snapshot * +intel_display_irq_snapshot_capture(struct intel_display *display) +{ + struct intel_display_irq_snapshot *snapshot; + + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + if (!snapshot) + return NULL; + + if (DISPLAY_VER(display) >= 6 && DISPLAY_VER(display) < 20) + snapshot->derrmr = intel_de_read(display, DERRMR); + + return snapshot; +} + +void intel_display_irq_snapshot_print(const struct intel_display_irq_snapshot *snapshot, + struct drm_printer *p) +{ + if (!snapshot) + return; + + drm_printf(p, "DERRMR: 0x%08x\n", snapshot->derrmr); +} diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.h b/drivers/gpu/drm/i915/display/intel_display_irq.h index f72727768351e..4f0e27a37854a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.h +++ b/drivers/gpu/drm/i915/display/intel_display_irq.h @@ -12,7 +12,9 @@ enum pipe; struct drm_crtc; +struct drm_printer; struct intel_display; +struct intel_display_irq_snapshot; void valleyview_enable_display_irqs(struct intel_display *display); void valleyview_disable_display_irqs(struct intel_display *display); @@ -82,4 +84,7 @@ void intel_display_irq_init(struct intel_display *display); void i915gm_irq_cstate_wa(struct intel_display *display, bool enable); +struct intel_display_irq_snapshot *intel_display_irq_snapshot_capture(struct intel_display *display); +void intel_display_irq_snapshot_print(const struct intel_display_irq_snapshot *snapshot, struct drm_printer *p); + #endif /* __INTEL_DISPLAY_IRQ_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_snapshot.c b/drivers/gpu/drm/i915/display/intel_display_snapshot.c index 25ba043cbb659..66087302fdbc9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_snapshot.c +++ b/drivers/gpu/drm/i915/display/intel_display_snapshot.c @@ -7,6 +7,7 @@ #include "intel_display_core.h" #include "intel_display_device.h" +#include "intel_display_irq.h" #include "intel_display_params.h" #include "intel_display_snapshot.h" #include "intel_dmc.h" @@ -20,6 +21,7 @@ struct intel_display_snapshot { struct intel_display_params params; struct intel_overlay_snapshot *overlay; struct intel_dmc_snapshot *dmc; + struct intel_display_irq_snapshot *irq; }; struct intel_display_snapshot *intel_display_snapshot_capture(struct intel_display *display) @@ -38,6 +40,7 @@ struct intel_display_snapshot *intel_display_snapshot_capture(struct intel_displ intel_display_params_copy(&snapshot->params); + snapshot->irq = intel_display_irq_snapshot_capture(display); snapshot->overlay = intel_overlay_snapshot_capture(display); snapshot->dmc = intel_dmc_snapshot_capture(display); @@ -57,6 +60,7 @@ void intel_display_snapshot_print(const struct intel_display_snapshot *snapshot, intel_display_device_info_print(&snapshot->info, &snapshot->runtime_info, p); intel_display_params_dump(&snapshot->params, display->drm->driver->name, p); + intel_display_irq_snapshot_print(snapshot->irq, p); intel_overlay_snapshot_print(snapshot->overlay, p); intel_dmc_snapshot_print(snapshot->dmc, p); } @@ -68,6 +72,7 @@ void intel_display_snapshot_free(struct intel_display_snapshot *snapshot) intel_display_params_free(&snapshot->params); + kfree(snapshot->irq); kfree(snapshot->overlay); kfree(snapshot->dmc); kfree(snapshot); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cb11c10d922ac..d8277b51b2504 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -726,12 +726,6 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m, intel_sseu_print_topology(gt->_gt->i915, >->info.sseu, &p); } -static void err_print_gt_display(struct drm_i915_error_state_buf *m, - struct intel_gt_coredump *gt) -{ - err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); -} - static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m, struct intel_gt_coredump *gt) { @@ -877,7 +871,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (error->gt->uc && error->gt->uc->guc.is_guc_capture) print_guc_capture = true; - err_print_gt_display(m, error->gt); err_print_gt_global_nonguc(m, error->gt); err_print_gt_fences(m, error->gt); @@ -1766,16 +1759,6 @@ gt_record_uc(struct intel_gt_coredump *gt, return error_uc; } -/* Capture display registers. */ -static void gt_record_display_regs(struct intel_gt_coredump *gt) -{ - struct intel_uncore *uncore = gt->_gt->uncore; - struct drm_i915_private *i915 = uncore->i915; - - if (DISPLAY_VER(i915) >= 6 && DISPLAY_VER(i915) < 20) - gt->derrmr = intel_uncore_read(uncore, DERRMR); -} - /* Capture all other registers that GuC doesn't capture. */ static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt) { @@ -2034,7 +2017,6 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags) gc->_gt = gt; gc->awake = intel_gt_pm_is_awake(gt); - gt_record_display_regs(gc); gt_record_global_nonguc_regs(gc); /* diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ac55603a81d76..182324979278c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -163,8 +163,6 @@ struct intel_gt_coredump { u32 clock_frequency; u32 clock_period_ns; - /* Display related */ - u32 derrmr; u32 sfc_done[I915_MAX_SFC]; /* gen12 */ u32 nfence; -- GitLab From 02cfe5a3bcb8768677a68e86016a5c7e138cd7ce Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 14 Apr 2025 14:29:47 +0300 Subject: [PATCH 279/899] drm/i915: don't capture DERRMR for VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DERRMR isn't valid for VLV/CHV. Don't capture it for them. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/4563cc7eb567ac508b84717c3708a4e48aa8b7bb.1744630147.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index f5dc050a63441..5d07b6a9e59e3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -2343,7 +2343,7 @@ intel_display_irq_snapshot_capture(struct intel_display *display) if (!snapshot) return NULL; - if (DISPLAY_VER(display) >= 6 && DISPLAY_VER(display) < 20) + if (DISPLAY_VER(display) >= 6 && DISPLAY_VER(display) < 20 && !HAS_GMCH(display)) snapshot->derrmr = intel_de_read(display, DERRMR); return snapshot; -- GitLab From 012aa48a587c3b263f62256a68438ec44fb0225d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 14 Apr 2025 14:29:48 +0300 Subject: [PATCH 280/899] drm/i915: use graphics version instead of PCH split in error capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid using PCH checks in core i915 code, in preparation for moving PCH handling to display. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/8d73eb1d56603210003554bc6a875c53ed4c692a.1744630147.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d8277b51b2504..2dcc869c1332b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1792,7 +1792,7 @@ static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt) gt->gtier[i] = intel_uncore_read(uncore, GEN8_GT_IER(i)); gt->ngtier = 4; - } else if (HAS_PCH_SPLIT(i915)) { + } else if (GRAPHICS_VER(i915) >= 5) { gt->gtier[0] = intel_uncore_read(uncore, GTIER); gt->ngtier = 1; } else { -- GitLab From 6998cfce0e1db58c730d08cadc6bfd71e26e2de0 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Mon, 14 Apr 2025 14:27:01 +0530 Subject: [PATCH 281/899] drm/i915/display: Add macro for checking 3 DSC engines 3 DSC engines per pipe is currently supported only for BMG. Add a macro to check whether a platform supports 3 DSC engines per pipe. v2:Fix Typo in macro argument. (Suraj). Added fixes tag. Bspec: 50175 Fixes: be7f5fcdf4a0 ("drm/i915/dp: Enable 3 DSC engines for 12 slices") Cc: Ankit Nautiyal Cc: Suraj Kandpal Cc: # v6.14+ Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250414085701.2802374-1-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 368b0d3417c26..87c666792c0da 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -163,6 +163,7 @@ struct intel_display_platforms { #define HAS_DP_MST(__display) (DISPLAY_INFO(__display)->has_dp_mst) #define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) #define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) +#define HAS_DSC_3ENGINES(__display) (DISPLAY_VERx100(__display) == 1401 && HAS_DSC(__display)) #define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) #define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) #define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30) -- GitLab From da9b1c61e7f7b327dd70c5f073ba04d419a55ef8 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Mon, 14 Apr 2025 08:12:56 +0530 Subject: [PATCH 282/899] drm/i915/dp: Check for HAS_DSC_3ENGINES while configuring DSC slices DSC 12 slices configuration is used for some specific cases with Ultrajoiner. This can be supported only when each of the 4 joined pipes have 3 DSC engines each. Add the missing check for 3 DSC engines support before using 3 DSC slices per pipe. Fixes: be7f5fcdf4a0 ("drm/i915/dp: Enable 3 DSC engines for 12 slices") Cc: Ankit Nautiyal Cc: Suraj Kandpal Cc: # v6.14+ Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250414024256.2782702-3-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index aeb14a5455fd1..d7a30d0992b7a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1050,10 +1050,11 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; /* - * 3 DSC Slices per pipe need 3 DSC engines, - * which is supported only with Ultrajoiner. + * 3 DSC Slices per pipe need 3 DSC engines, which is supported only + * with Ultrajoiner only for some platforms. */ - if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4) + if (valid_dsc_slicecount[i] == 3 && + (!HAS_DSC_3ENGINES(display) || num_joined_pipes != 4)) continue; if (test_slice_count > -- GitLab From 0e96a9b9470647fd5165ec0f971cd30f6d0fd9dc Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Wed, 9 Apr 2025 16:02:14 -0700 Subject: [PATCH 283/899] drm/i915/display: Add link rate and lane count to i915_display_info Adding link rate and lane count information to i915_display_info makes it easier and faster to access this data compared to checking kernel logs. This is particularly beneficial for individuals who are not familiar with i915 in the following scenarios: * Debugging DP tunnel bandwidth usage in the Thunderbolt driver. * During USB4 certification, it is necessary to know the link rate used by the monitor to prove that the DP tunnel can handle required rates. * In PHY CTS, when the connector probes are not mounted correctly, some display lanes may not appear in the DP Oscilloscope, leading to CTS failures. This change provides validation teams with an easy way to identify and troubleshoot issues. v2: separate seq_printf line (Jani) v3: separate output line (Jani) Cc: Imre Deak Cc: Jani Nikula Signed-off-by: Khaled Almahallawy Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250409230214.963999-1-khaled.almahallawy@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 4c208fdb91375..3f7c605d47d3a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -558,6 +558,8 @@ static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc) seq_printf(m, "\tpipe src=" DRM_RECT_FMT ", dither=%s, bpp=%d\n", DRM_RECT_ARG(&crtc_state->pipe_src), str_yes_no(crtc_state->dither), crtc_state->pipe_bpp); + seq_printf(m, "\tport_clock=%d, lane_count=%d\n", + crtc_state->port_clock, crtc_state->lane_count); intel_scaler_info(m, crtc); -- GitLab From 27dbba9f5476f9a94598773aab0ab9229832ec27 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:47:02 +0300 Subject: [PATCH 284/899] drm/i915/irq: convert ibx_irq_reset() into ibx_display_irq_reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Observe that ibx_irq_reset() is really ibx_display_irq_reset(). Make it so. Move to display, and call it directly from gen8_display_irq_reset() instead of gen8_irq_reset(). Remove a nearby ancient stale comment while at it. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250409184702.3790548-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_display_irq.c | 17 ++++++++++++++ .../gpu/drm/i915/display/intel_display_irq.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 22 ++----------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 5d07b6a9e59e3..d28ad70a35380 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1960,8 +1960,22 @@ void vlv_display_irq_postinstall(struct intel_display *display) intel_display_irq_regs_init(display, VLV_IRQ_REGS, dev_priv->irq_mask, enable_mask); } +void ibx_display_irq_reset(struct intel_display *display) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + + if (HAS_PCH_NOP(i915)) + return; + + gen2_irq_reset(to_intel_uncore(display->drm), SDE_IRQ_REGS); + + if (HAS_PCH_CPT(i915) || HAS_PCH_LPT(i915)) + intel_de_write(display, SERR_INT, 0xffffffff); +} + void gen8_display_irq_reset(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe; if (!HAS_DISPLAY(display)) @@ -1977,6 +1991,9 @@ void gen8_display_irq_reset(struct intel_display *display) intel_display_irq_regs_reset(display, GEN8_DE_PORT_IRQ_REGS); intel_display_irq_regs_reset(display, GEN8_DE_MISC_IRQ_REGS); + + if (HAS_PCH_SPLIT(i915)) + ibx_display_irq_reset(display); } void gen11_display_irq_reset(struct intel_display *display) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.h b/drivers/gpu/drm/i915/display/intel_display_irq.h index 4f0e27a37854a..5422426c6843d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.h +++ b/drivers/gpu/drm/i915/display/intel_display_irq.h @@ -56,6 +56,7 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl); void gen11_gu_misc_irq_handler(struct intel_display *display, const u32 iir); void i9xx_display_irq_reset(struct intel_display *display); +void ibx_display_irq_reset(struct intel_display *display); void vlv_display_irq_reset(struct intel_display *display); void gen8_display_irq_reset(struct intel_display *display); void gen11_display_irq_reset(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c1f938a1da44e..d06694d6531e7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -658,23 +658,9 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -static void ibx_irq_reset(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - - if (HAS_PCH_NOP(dev_priv)) - return; - - gen2_irq_reset(uncore, SDE_IRQ_REGS); - - if (HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) - intel_uncore_write(&dev_priv->uncore, SERR_INT, 0xffffffff); -} - -/* drm_dma.h hooks -*/ static void ilk_irq_reset(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_uncore *uncore = &dev_priv->uncore; gen2_irq_reset(uncore, DE_IRQ_REGS); @@ -690,7 +676,7 @@ static void ilk_irq_reset(struct drm_i915_private *dev_priv) gen5_gt_irq_reset(to_gt(dev_priv)); - ibx_irq_reset(dev_priv); + ibx_display_irq_reset(display); } static void valleyview_irq_reset(struct drm_i915_private *dev_priv) @@ -717,10 +703,6 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) gen8_gt_irq_reset(to_gt(dev_priv)); gen8_display_irq_reset(display); gen2_irq_reset(uncore, GEN8_PCU_IRQ_REGS); - - if (HAS_PCH_SPLIT(dev_priv)) - ibx_irq_reset(dev_priv); - } static void gen11_irq_reset(struct drm_i915_private *dev_priv) -- GitLab From b484c1e225a6a582fc78c4d7af7b286408bb7d41 Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Tue, 15 Apr 2025 12:06:16 +0300 Subject: [PATCH 285/899] drm/i915/pxp: fix undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' On x86_64 with gcc version 13.3.0, I compile kernel with: make defconfig ./scripts/kconfig/merge_config.sh .config <( echo CONFIG_COMPILE_TEST=y ) make KCFLAGS="-fno-inline-functions -fno-inline-small-functions -fno-inline-functions-called-once" Then I get a linker error: ld: vmlinux.o: in function `pxp_fw_dependencies_completed': kintel_pxp.c:(.text+0x95728f): undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' This is caused by not having a intel_pxp_gsccs_is_ready_for_sessions() header stub for CONFIG_DRM_I915_PXP=n. Add it. Signed-off-by: Chen Linxuan Fixes: 99afb7cc8c44 ("drm/i915/pxp: Add ARB session creation and cleanup") Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250415090616.2649889-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da31..4969d3de2bac3 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } -#endif +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); +#endif #endif /*__INTEL_PXP_GSCCS_H__ */ -- GitLab From 182f7443e53d4c633e1b47bdca57600a91c9bbb8 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:42:59 +0200 Subject: [PATCH 286/899] dt-bindings: display: mediatek: Add binding for HDMIv2 DDC Add a binding for the Display Data Channel (DDC) IP in MediaTek SoCs with version 2 HDMI TX IP. Reviewed-by: Rob Herring (Arm) Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-2-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- .../mediatek/mediatek,mt8195-hdmi-ddc.yaml | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml new file mode 100644 index 0000000000000..bde4dc556d4f9 --- /dev/null +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek HDMI MT8195 series HDMI Display Data Channel (DDC) + +maintainers: + - AngeloGioacchino Del Regno + - CK Hu + +properties: + compatible: + oneOf: + - const: mediatek,mt8195-hdmi-ddc + - items: + - const: mediatek,mt8188-hdmi-ddc + - const: mediatek,mt8195-hdmi-ddc + + clocks: + maxItems: 1 + + power-domains: + maxItems: 1 + +required: + - compatible + - clocks + +additionalProperties: false + +examples: + - | + hdmi { + hdmi_ddc: i2c { + compatible = "mediatek,mt8195-hdmi-ddc"; + clocks = <&clk26m>; + }; + }; +... -- GitLab From 8745e78c8a06b817a9a315817eb83090c92bfd9b Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:00 +0200 Subject: [PATCH 287/899] dt-bindings: display: mediatek: Add binding for MT8195 HDMI-TX v2 Add a binding for the HDMI TX v2 Encoder found in MediaTek MT8195 and MT8188 SoCs. This fully supports the HDMI Specification 2.0b, hence it provides support for 3D-HDMI, Polarity inversion, up to 16 bits Deep Color, color spaces including RGB444, YCBCR420/422/444 (ITU601/ITU709) and xvYCC, with output resolutions up to 3840x2160p@60Hz. Moreover, it also supports HDCP 1.4 and 2.3, Variable Refresh Rate (VRR) and Consumer Electronics Control (CEC). This IP also includes support for HDMI Audio, including IEC60958 and IEC61937 SPDIF, 8-channel PCM, DSD, and other lossless audio according to HDMI 2.0. Reviewed-by: Rob Herring (Arm) Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-3-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- .../mediatek/mediatek,mt8195-hdmi.yaml | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml new file mode 100644 index 0000000000000..1b382f99d3ced --- /dev/null +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml @@ -0,0 +1,151 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/mediatek/mediatek,mt8195-hdmi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek MT8195 series HDMI-TX Encoder + +maintainers: + - AngeloGioacchino Del Regno + - CK Hu + +description: + The MediaTek HDMI-TX v2 encoder can generate HDMI format data based on + the HDMI Specification 2.0b. + +properties: + compatible: + enum: + - mediatek,mt8188-hdmi-tx + - mediatek,mt8195-hdmi-tx + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: HDMI Peripheral Bus (APB) clock + - description: HDCP and HDMI_TOP clock + - description: HDCP, HDMI_TOP and HDMI Audio reference clock + - description: VPP HDMI Split clock + + clock-names: + items: + - const: bus + - const: hdcp + - const: hdcp24m + - const: hdmi-split + + i2c: + type: object + $ref: /schemas/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml + unevaluatedProperties: false + description: HDMI DDC I2C controller + + phys: + maxItems: 1 + description: PHY providing clocking TMDS and pixel to controller + + phy-names: + items: + - const: hdmi + + power-domains: + maxItems: 1 + + '#sound-dai-cells': + const: 1 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + Input port, usually connected to the output port of a DPI + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: + Output port that must be connected either to the input port of + a HDMI connector node containing a ddc-i2c-bus, or to the input + port of an attached bridge chip, such as a SlimPort transmitter. + + required: + - port@0 + - port@1 + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + - phys + - phy-names + - ports + +allOf: + - $ref: /schemas/sound/dai-common.yaml# + +additionalProperties: false + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + hdmi@1c300000 { + compatible = "mediatek,mt8195-hdmi-tx"; + reg = <0 0x1c300000 0 0x1000>; + clocks = <&topckgen CLK_TOP_HDMI_APB>, + <&topckgen CLK_TOP_HDCP>, + <&topckgen CLK_TOP_HDCP_24M>, + <&vppsys1 CLK_VPP1_VPP_SPLIT_HDMI>; + clock-names = "bus", "hdcp", "hdcp24m", "hdmi-split"; + interrupts = ; + phys = <&hdmi_phy>; + phy-names = "hdmi"; + power-domains = <&spm MT8195_POWER_DOMAIN_HDMI_TX>; + pinctrl-names = "default"; + pinctrl-0 = <&hdmi_pins>; + #sound-dai-cells = <1>; + + hdmitx_ddc: i2c { + compatible = "mediatek,mt8195-hdmi-ddc"; + clocks = <&clk26m>; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + hdmi_in: endpoint { + remote-endpoint = <&dpi1_out>; + }; + }; + + port@1 { + reg = <1>; + + hdmi_out: endpoint { + remote-endpoint = <&hdmi_connector_in>; + }; + }; + }; + }; + }; -- GitLab From 1b0965f495cdd678ee95caeccd85c6afb3c4fea1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 15 Apr 2025 12:43:01 +0200 Subject: [PATCH 288/899] drm/mediatek/hdmi: Use syscon_regmap_lookup_by_phandle_args Use syscon_regmap_lookup_by_phandle_args() which is a wrapper over syscon_regmap_lookup_by_phandle() combined with getting the syscon argument. Except simpler code this annotates within one line that given phandle has arguments, so grepping for code would be easier. There is also no real benefit in printing errors on missing syscon argument, because this is done just too late: runtime check on static/build-time data. Dtschema and Devicetree bindings offer the static/build-time check for this already. Signed-off-by: Krzysztof Kozlowski Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-4-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 06e4fac152b77..1358a5095fde4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1447,15 +1447,11 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, * MMSYS_CONFIG device and the register offset of the HDMI_SYS_CFG * registers it contains. */ - regmap = syscon_regmap_lookup_by_phandle(np, "mediatek,syscon-hdmi"); - ret = of_property_read_u32_index(np, "mediatek,syscon-hdmi", 1, - &hdmi->sys_offset); - if (IS_ERR(regmap)) - ret = PTR_ERR(regmap); - if (ret) { - dev_err(dev, - "Failed to get system configuration registers: %d\n", - ret); + regmap = syscon_regmap_lookup_by_phandle_args(np, "mediatek,syscon-hdmi", + 1, &hdmi->sys_offset); + if (IS_ERR(regmap)) { + ret = dev_err_probe(dev, PTR_ERR(regmap), + "Failed to get system configuration registers\n"); goto put_device; } hdmi->sys_regmap = regmap; -- GitLab From 8dcb54d440d014fdb560e9bc38684fabcd67d872 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:42 +0300 Subject: [PATCH 289/899] drm/i915/fb: convert intel_fbdev.[ch] and intel_fbdev_fb.[ch] to struct intel_display Going forward, struct intel_display is the main display device data pointer. Convert intel_fbdev.[ch] and as much as possible of intel_fbdev_fb.[ch] to struct intel_display. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/49651754f3716041f97984e47c15d331851870a5.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_display_driver.c | 3 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 74 +++++++++---------- drivers/gpu/drm/i915/display/intel_fbdev.h | 6 +- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 13 ++-- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 4 +- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 6 +- 6 files changed, 51 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index e4192f6cb0c75..7faddbc6ad9a0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -528,7 +528,6 @@ int intel_display_driver_probe(struct intel_display *display) void intel_display_driver_register(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct drm_printer p = drm_dbg_printer(display->drm, DRM_UT_KMS, "i915 display info:"); @@ -555,7 +554,7 @@ void intel_display_driver_register(struct intel_display *display) drm_kms_helper_poll_init(display->drm); intel_hpd_poll_disable(display); - intel_fbdev_setup(i915); + intel_fbdev_setup(display); intel_display_device_info_print(DISPLAY_INFO(display), DISPLAY_RUNTIME_INFO(display), &p); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 369f46286e95a..2dc4029d71ed6 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -47,9 +47,9 @@ #include #include -#include "i915_drv.h" #include "i915_vma.h" #include "intel_bo.h" +#include "intel_display_core.h" #include "intel_display_rpm.h" #include "intel_display_types.h" #include "intel_fb.h" @@ -66,9 +66,9 @@ struct intel_fbdev { static struct intel_fbdev *to_intel_fbdev(struct drm_fb_helper *fb_helper) { - struct drm_i915_private *i915 = to_i915(fb_helper->client.dev); + struct intel_display *display = to_intel_display(fb_helper->client.dev); - return i915->display.fbdev.fbdev; + return display->fbdev.fbdev; } static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev) @@ -210,11 +210,9 @@ static const struct drm_fb_helper_funcs intel_fb_helper_funcs = { int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { + struct intel_display *display = to_intel_display(helper->dev); struct intel_fbdev *ifbdev = to_intel_fbdev(helper); struct intel_framebuffer *fb = ifbdev->fb; - struct drm_device *dev = helper->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_display *display = to_intel_display(dev); struct ref_tracker *wakeref; struct fb_info *info; struct i915_vma *vma; @@ -228,7 +226,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, if (fb && (sizes->fb_width > fb->base.width || sizes->fb_height > fb->base.height)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "BIOS fb too small (%dx%d), we require (%dx%d)," " releasing it\n", fb->base.width, fb->base.height, @@ -236,14 +234,14 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, drm_framebuffer_put(&fb->base); fb = NULL; } - if (!fb || drm_WARN_ON(dev, !intel_fb_bo(&fb->base))) { - drm_dbg_kms(&dev_priv->drm, + if (!fb || drm_WARN_ON(display->drm, !intel_fb_bo(&fb->base))) { + drm_dbg_kms(display->drm, "no BIOS fb, allocating a new one\n"); fb = intel_fbdev_fb_alloc(helper, sizes); if (IS_ERR(fb)) return PTR_ERR(fb); } else { - drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n"); + drm_dbg_kms(display->drm, "re-using BIOS fb\n"); prealloc = true; sizes->fb_width = fb->base.width; sizes->fb_height = fb->base.height; @@ -267,7 +265,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, info = drm_fb_helper_alloc_info(helper); if (IS_ERR(info)) { - drm_err(&dev_priv->drm, "Failed to allocate fb_info (%pe)\n", info); + drm_err(display->drm, "Failed to allocate fb_info (%pe)\n", info); ret = PTR_ERR(info); goto out_unpin; } @@ -279,11 +277,11 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, obj = intel_fb_bo(&fb->base); - ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma); + ret = intel_fbdev_fb_fill_info(display, info, obj, vma); if (ret) goto out_unpin; - drm_fb_helper_fill_info(info, dev->fb_helper, sizes); + drm_fb_helper_fill_info(info, display->drm->fb_helper, sizes); /* If the object is shmemfs backed, it will have given us zeroed pages. * If the object is stolen however, it will be full of whatever @@ -294,7 +292,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - drm_dbg_kms(&dev_priv->drm, "allocated %dx%d fb: 0x%08x\n", + drm_dbg_kms(display->drm, "allocated %dx%d fb: 0x%08x\n", fb->base.width, fb->base.height, i915_ggtt_offset(vma)); ifbdev->fb = fb; @@ -322,16 +320,15 @@ out_unlock: * Note we only support a single fb shared across pipes for boot (mostly for * fbcon), so we just find the biggest and use that. */ -static bool intel_fbdev_init_bios(struct drm_device *dev, +static bool intel_fbdev_init_bios(struct intel_display *display, struct intel_fbdev *ifbdev) { - struct drm_i915_private *i915 = to_i915(dev); struct intel_framebuffer *fb = NULL; struct intel_crtc *crtc; unsigned int max_size = 0; /* Find the largest fb */ - for_each_intel_crtc(dev, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane *plane = @@ -341,21 +338,21 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, struct drm_gem_object *obj = intel_fb_bo(plane_state->uapi.fb); if (!crtc_state->uapi.active) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s] not active, skipping\n", crtc->base.base.id, crtc->base.name); continue; } if (!obj) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[PLANE:%d:%s] no fb, skipping\n", plane->base.base.id, plane->base.name); continue; } if (obj->size > max_size) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "found possible fb from [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); fb = to_intel_framebuffer(plane_state->uapi.fb); @@ -364,13 +361,13 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } if (!fb) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "no active fbs found, not using BIOS config\n"); goto out; } /* Now make sure all the pipes will fit into it */ - for_each_intel_crtc(dev, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane *plane = @@ -378,13 +375,13 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, unsigned int cur_size; if (!crtc_state->uapi.active) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s] not active, skipping\n", crtc->base.base.id, crtc->base.name); continue; } - drm_dbg_kms(&i915->drm, "checking [PLANE:%d:%s] for BIOS fb\n", + drm_dbg_kms(display->drm, "checking [PLANE:%d:%s] for BIOS fb\n", plane->base.base.id, plane->base.name); /* @@ -395,7 +392,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, cur_size = crtc_state->uapi.adjusted_mode.crtc_hdisplay; cur_size = cur_size * fb->base.format->cpp[0]; if (fb->base.pitches[0] < cur_size) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "fb not wide enough for [PLANE:%d:%s] (%d vs %d)\n", plane->base.base.id, plane->base.name, cur_size, fb->base.pitches[0]); @@ -406,7 +403,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, cur_size = crtc_state->uapi.adjusted_mode.crtc_vdisplay; cur_size = intel_fb_align_height(&fb->base, 0, cur_size); cur_size *= fb->base.pitches[0]; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s] area: %dx%d, bpp: %d, size: %d\n", crtc->base.base.id, crtc->base.name, crtc_state->uapi.adjusted_mode.crtc_hdisplay, @@ -415,7 +412,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, cur_size); if (cur_size > max_size) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "fb not big enough for [PLANE:%d:%s] (%d vs %d)\n", plane->base.base.id, plane->base.name, cur_size, max_size); @@ -423,14 +420,14 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, break; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "fb big enough [PLANE:%d:%s] (%d >= %d)\n", plane->base.base.id, plane->base.name, max_size, cur_size); } if (!fb) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "BIOS fb not suitable for all pipes, not using\n"); goto out; } @@ -440,7 +437,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, drm_framebuffer_get(&ifbdev->fb->base); /* Final pass to check if any active pipes don't have fbs */ - for_each_intel_crtc(dev, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane *plane = @@ -451,13 +448,13 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, if (!crtc_state->uapi.active) continue; - drm_WARN(dev, !plane_state->uapi.fb, + drm_WARN(display->drm, !plane_state->uapi.fb, "re-used BIOS config but lost an fb on [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); } - drm_dbg_kms(&i915->drm, "using BIOS fb for initial console\n"); + drm_dbg_kms(display->drm, "using BIOS fb for initial console\n"); return true; out: @@ -482,26 +479,25 @@ static unsigned int intel_fbdev_color_mode(const struct drm_format_info *info) } } -void intel_fbdev_setup(struct drm_i915_private *i915) +void intel_fbdev_setup(struct intel_display *display) { - struct drm_device *dev = &i915->drm; struct intel_fbdev *ifbdev; unsigned int preferred_bpp = 0; - if (!HAS_DISPLAY(i915)) + if (!HAS_DISPLAY(display)) return; - ifbdev = drmm_kzalloc(dev, sizeof(*ifbdev), GFP_KERNEL); + ifbdev = drmm_kzalloc(display->drm, sizeof(*ifbdev), GFP_KERNEL); if (!ifbdev) return; - i915->display.fbdev.fbdev = ifbdev; - if (intel_fbdev_init_bios(dev, ifbdev)) + display->fbdev.fbdev = ifbdev; + if (intel_fbdev_init_bios(display, ifbdev)) preferred_bpp = intel_fbdev_color_mode(ifbdev->fb->base.format); if (!preferred_bpp) preferred_bpp = 32; - drm_client_setup_with_color_mode(dev, preferred_bpp); + drm_client_setup_with_color_mode(display->drm, preferred_bpp); } struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.h b/drivers/gpu/drm/i915/display/intel_fbdev.h index 89bad3a2b01a9..a15e3e222a0c7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev.h @@ -10,7 +10,7 @@ struct drm_fb_helper; struct drm_fb_helper_surface_size; -struct drm_i915_private; +struct intel_display; struct intel_fbdev; struct intel_framebuffer; @@ -19,14 +19,14 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes); #define INTEL_FBDEV_DRIVER_OPS \ .fbdev_probe = intel_fbdev_driver_fbdev_probe -void intel_fbdev_setup(struct drm_i915_private *dev_priv); +void intel_fbdev_setup(struct intel_display *display); struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev); struct i915_vma *intel_fbdev_vma_pointer(struct intel_fbdev *fbdev); #else #define INTEL_FBDEV_DRIVER_OPS \ .fbdev_probe = NULL -static inline void intel_fbdev_setup(struct drm_i915_private *dev_priv) +static inline void intel_fbdev_setup(struct intel_display *display) { } static inline struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 4991c35a2632f..5f4cb3328265f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -15,9 +15,9 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { + struct intel_display *display = to_intel_display(helper->dev); + struct drm_i915_private *dev_priv = to_i915(display->drm); struct drm_framebuffer *fb; - struct drm_device *dev = helper->dev; - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; int size; @@ -50,14 +50,14 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, * * Also skip stolen on MTL as Wa_22018444074 mitigation. */ - if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size) + if (!display->platform.meteorlake && size * 2 < dev_priv->dsm.usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(dev_priv, size); } if (IS_ERR(obj)) { - drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); + drm_err(display->drm, "failed to allocate framebuffer (%pe)\n", obj); return ERR_PTR(-ENOMEM); } @@ -67,9 +67,10 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, return to_intel_framebuffer(fb); } -int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, +int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *_obj, struct i915_vma *vma) { + struct drm_i915_private *i915 = to_i915(display->drm); struct drm_i915_gem_object *obj = to_intel_bo(_obj); struct i915_gem_ww_ctx ww; void __iomem *vaddr; @@ -101,7 +102,7 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { - drm_err(&i915->drm, + drm_err(display->drm, "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); ret = PTR_ERR(vaddr); continue; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index e502ae375fc03..cb79572727150 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -9,13 +9,13 @@ struct drm_fb_helper; struct drm_fb_helper_surface_size; struct drm_gem_object; -struct drm_i915_private; struct fb_info; struct i915_vma; +struct intel_display; struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes); -int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, +int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *obj, struct i915_vma *vma); #endif diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 3a1e505ff1820..cc1b7a2b80f68 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -79,11 +79,11 @@ err: return ERR_CAST(fb); } -int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, - struct drm_gem_object *_obj, struct i915_vma *vma) +int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, + struct drm_gem_object *_obj, struct i915_vma *vma) { struct xe_bo *obj = gem_to_xe_bo(_obj); - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { if (obj->flags & XE_BO_FLAG_STOLEN) -- GitLab From c4f9a886eec066155e2c4e37eb91477d8ea9fedf Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:43 +0300 Subject: [PATCH 290/899] drm/i915/display: convert intel_modeset_setup.[ch] to struct intel_display Going forward, struct intel_display is the main display device data pointer. Convert intel_modeset_setup.[ch] to struct intel_display. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/21d51387a36f027313a0687d09a14586eb8f71a6.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_display_driver.c | 6 +- .../drm/i915/display/intel_modeset_setup.c | 192 +++++++++--------- .../drm/i915/display/intel_modeset_setup.h | 4 +- 3 files changed, 97 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 7faddbc6ad9a0..eb3ae05d15690 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -418,7 +418,6 @@ bool intel_display_driver_check_access(struct intel_display *display) /* part #2: call after irq install, but before gem init */ int intel_display_driver_probe_nogem(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe; int ret; @@ -467,7 +466,7 @@ int intel_display_driver_probe_nogem(struct intel_display *display) intel_display_driver_disable_user_access(display); drm_modeset_lock_all(display->drm); - intel_modeset_setup_hw_state(i915, display->drm->mode_config.acquire_ctx); + intel_modeset_setup_hw_state(display, display->drm->mode_config.acquire_ctx); intel_acpi_assign_connector_fwnodes(display); drm_modeset_unlock_all(display->drm); @@ -689,12 +688,11 @@ __intel_display_driver_resume(struct intel_display *display, struct drm_atomic_state *state, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *i915 = to_i915(display->drm); struct drm_crtc_state *crtc_state; struct drm_crtc *crtc; int ret, i; - intel_modeset_setup_hw_state(i915, ctx); + intel_modeset_setup_hw_state(display, ctx); intel_vga_redisable(display); if (!state) diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 9e963bce340ff..5d5ade7fdd778 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -6,11 +6,11 @@ * state. */ -#include #include +#include +#include #include -#include "i915_drv.h" #include "i915_reg.h" #include "i9xx_wm.h" #include "intel_atomic.h" @@ -37,7 +37,7 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane *plane; @@ -48,7 +48,7 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc, if (!crtc_state->hw.active) return; - for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) { + for_each_intel_plane_on_crtc(display->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -56,9 +56,9 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc, intel_plane_disable_noatomic(crtc, plane); } - state = drm_atomic_state_alloc(&i915->drm); + state = drm_atomic_state_alloc(display->drm); if (!state) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "failed to disable [CRTC:%d:%s], out of memory", crtc->base.base.id, crtc->base.name); return; @@ -68,7 +68,7 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc, to_intel_atomic_state(state)->internal = true; /* Everything's already locked, -EDEADLK can't happen. */ - for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, + for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, BIT(pipe) | intel_crtc_joiner_secondary_pipes(crtc_state)) { struct intel_crtc_state *temp_crtc_state = @@ -77,14 +77,14 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc, ret = drm_atomic_add_affected_connectors(state, &temp_crtc->base); - drm_WARN_ON(&i915->drm, IS_ERR(temp_crtc_state) || ret); + drm_WARN_ON(display->drm, IS_ERR(temp_crtc_state) || ret); } - i915->display.funcs.display->crtc_disable(to_intel_atomic_state(state), crtc); + display->funcs.display->crtc_disable(to_intel_atomic_state(state), crtc); drm_atomic_state_put(state); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n", crtc->base.base.id, crtc->base.name); @@ -118,13 +118,12 @@ static void set_encoder_for_connector(struct intel_connector *connector, static void reset_encoder_connector_state(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_pmdemand_state *pmdemand_state = - to_intel_pmdemand_state(i915->display.pmdemand.obj.state); + to_intel_pmdemand_state(display->pmdemand.obj.state); struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->base.encoder != &encoder->base) continue; @@ -143,10 +142,10 @@ static void reset_encoder_connector_state(struct intel_encoder *encoder) static void reset_crtc_encoder_state(struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); struct intel_encoder *encoder; - for_each_encoder_on_crtc(&i915->drm, &crtc->base, encoder) { + for_each_encoder_on_crtc(display->drm, &crtc->base, encoder) { reset_encoder_connector_state(encoder); encoder->base.crtc = NULL; } @@ -183,13 +182,13 @@ static void intel_crtc_disable_noatomic_complete(struct intel_crtc *crtc) * Return all the pipes using a transcoder in @transcoder_mask. * For joiner configs return only the joiner primary. */ -static u8 get_transcoder_pipes(struct drm_i915_private *i915, +static u8 get_transcoder_pipes(struct intel_display *display, u8 transcoder_mask) { struct intel_crtc *temp_crtc; u8 pipes = 0; - for_each_intel_crtc(&i915->drm, temp_crtc) { + for_each_intel_crtc(display->drm, temp_crtc) { struct intel_crtc_state *temp_crtc_state = to_intel_crtc_state(temp_crtc->base.state); @@ -214,7 +213,6 @@ static void get_portsync_pipes(struct intel_crtc *crtc, u8 *master_pipe_mask, u8 *slave_pipes_mask) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_crtc *master_crtc; @@ -233,20 +231,20 @@ static void get_portsync_pipes(struct intel_crtc *crtc, else master_transcoder = crtc_state->master_transcoder; - *master_pipe_mask = get_transcoder_pipes(i915, BIT(master_transcoder)); - drm_WARN_ON(&i915->drm, !is_power_of_2(*master_pipe_mask)); + *master_pipe_mask = get_transcoder_pipes(display, BIT(master_transcoder)); + drm_WARN_ON(display->drm, !is_power_of_2(*master_pipe_mask)); master_crtc = intel_crtc_for_pipe(display, ffs(*master_pipe_mask) - 1); master_crtc_state = to_intel_crtc_state(master_crtc->base.state); - *slave_pipes_mask = get_transcoder_pipes(i915, master_crtc_state->sync_mode_slaves_mask); + *slave_pipes_mask = get_transcoder_pipes(display, master_crtc_state->sync_mode_slaves_mask); } -static u8 get_joiner_secondary_pipes(struct drm_i915_private *i915, u8 primary_pipes_mask) +static u8 get_joiner_secondary_pipes(struct intel_display *display, u8 primary_pipes_mask) { struct intel_crtc *primary_crtc; u8 pipes = 0; - for_each_intel_crtc_in_pipe_mask(&i915->drm, primary_crtc, primary_pipes_mask) { + for_each_intel_crtc_in_pipe_mask(display->drm, primary_crtc, primary_pipes_mask) { struct intel_crtc_state *primary_crtc_state = to_intel_crtc_state(primary_crtc->base.state); @@ -259,45 +257,45 @@ static u8 get_joiner_secondary_pipes(struct drm_i915_private *i915, u8 primary_p static void intel_crtc_disable_noatomic(struct intel_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); + struct intel_crtc *temp_crtc; u8 portsync_master_mask; u8 portsync_slaves_mask; u8 joiner_secondaries_mask; - struct intel_crtc *temp_crtc; /* TODO: Add support for MST */ get_portsync_pipes(crtc, &portsync_master_mask, &portsync_slaves_mask); - joiner_secondaries_mask = get_joiner_secondary_pipes(i915, + joiner_secondaries_mask = get_joiner_secondary_pipes(display, portsync_master_mask | portsync_slaves_mask); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, portsync_master_mask & portsync_slaves_mask || portsync_master_mask & joiner_secondaries_mask || portsync_slaves_mask & joiner_secondaries_mask); - for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, joiner_secondaries_mask) + for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, joiner_secondaries_mask) intel_crtc_disable_noatomic_begin(temp_crtc, ctx); - for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, portsync_slaves_mask) + for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, portsync_slaves_mask) intel_crtc_disable_noatomic_begin(temp_crtc, ctx); - for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, portsync_master_mask) + for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, portsync_master_mask) intel_crtc_disable_noatomic_begin(temp_crtc, ctx); - for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, + for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, joiner_secondaries_mask | portsync_slaves_mask | portsync_master_mask) intel_crtc_disable_noatomic_complete(temp_crtc); } -static void intel_modeset_update_connector_atomic_state(struct drm_i915_private *i915) +static void intel_modeset_update_connector_atomic_state(struct intel_display *display) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { struct drm_connector_state *conn_state = connector->base.state; struct intel_encoder *encoder = @@ -319,7 +317,7 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); if (intel_crtc_is_joiner_secondary(crtc_state)) return; @@ -332,7 +330,7 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode; crtc_state->uapi.scaling_filter = crtc_state->hw.scaling_filter; - if (DISPLAY_INFO(i915)->color.degamma_lut_size) { + if (DISPLAY_INFO(display)->color.degamma_lut_size) { /* assume 1:1 mapping */ drm_property_replace_blob(&crtc_state->hw.degamma_lut, crtc_state->pre_csc_lut); @@ -347,7 +345,7 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state * to gamma_lut as that is the only valid source of LUTs * in the uapi. */ - drm_WARN_ON(&i915->drm, crtc_state->post_csc_lut && + drm_WARN_ON(display->drm, crtc_state->post_csc_lut && crtc_state->pre_csc_lut); drm_property_replace_blob(&crtc_state->hw.degamma_lut, @@ -366,15 +364,14 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state } static void -intel_sanitize_plane_mapping(struct drm_i915_private *i915) +intel_sanitize_plane_mapping(struct intel_display *display) { - struct intel_display *display = &i915->display; struct intel_crtc *crtc; - if (DISPLAY_VER(i915) >= 4) + if (DISPLAY_VER(display) >= 4) return; - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_plane *plane = to_intel_plane(crtc->base.primary); struct intel_crtc *plane_crtc; @@ -386,7 +383,7 @@ intel_sanitize_plane_mapping(struct drm_i915_private *i915) if (pipe == crtc->pipe) continue; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[PLANE:%d:%s] attached to the wrong pipe, disabling plane\n", plane->base.base.id, plane->base.name); @@ -423,12 +420,12 @@ static bool intel_crtc_needs_link_reset(struct intel_crtc *crtc) static struct intel_connector *intel_encoder_find_connector(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); struct drm_connector_list_iter conn_iter; struct intel_connector *connector; struct intel_connector *found_connector = NULL; - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (&encoder->base == connector->base.encoder) { found_connector = connector; @@ -466,7 +463,7 @@ static void intel_sanitize_fifo_underrun_reporting(const struct intel_crtc_state static bool intel_sanitize_crtc(struct intel_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); bool needs_link_reset; @@ -474,7 +471,7 @@ static bool intel_sanitize_crtc(struct intel_crtc *crtc, struct intel_plane *plane; /* Disable everything but the primary plane */ - for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) { + for_each_intel_plane_on_crtc(display->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -515,7 +512,7 @@ static bool intel_sanitize_crtc(struct intel_crtc *crtc, return true; } -static void intel_sanitize_all_crtcs(struct drm_i915_private *i915, +static void intel_sanitize_all_crtcs(struct intel_display *display, struct drm_modeset_acquire_ctx *ctx) { struct intel_crtc *crtc; @@ -530,7 +527,7 @@ static void intel_sanitize_all_crtcs(struct drm_i915_private *i915, for (;;) { u32 old_mask = crtcs_forced_off; - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { u32 crtc_mask = drm_crtc_mask(&crtc->base); if (crtcs_forced_off & crtc_mask) @@ -543,7 +540,7 @@ static void intel_sanitize_all_crtcs(struct drm_i915_private *i915, break; } - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); @@ -553,7 +550,7 @@ static void intel_sanitize_all_crtcs(struct drm_i915_private *i915, static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); /* * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram @@ -565,7 +562,7 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) * without several WARNs, but for now let's take the easy * road. */ - return IS_SANDYBRIDGE(i915) && + return display->platform.sandybridge && crtc_state->hw.active && crtc_state->shared_dpll && crtc_state->port_clock == 0; @@ -574,13 +571,12 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_connector *connector; struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); struct intel_crtc_state *crtc_state = crtc ? to_intel_crtc_state(crtc->base.state) : NULL; struct intel_pmdemand_state *pmdemand_state = - to_intel_pmdemand_state(i915->display.pmdemand.obj.state); + to_intel_pmdemand_state(display->pmdemand.obj.state); /* * We need to check both for a crtc link (meaning that the encoder is @@ -591,7 +587,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) crtc_state->hw.active; if (crtc_state && has_bogus_dpll_config(crtc_state)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "BIOS has misprogrammed the hardware. Disabling pipe %c\n", pipe_name(crtc->pipe)); has_active_crtc = false; @@ -599,7 +595,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) connector = intel_encoder_find_connector(encoder); if (connector && !has_active_crtc) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] has active connectors but no active pipe!\n", encoder->base.base.id, encoder->base.name); @@ -616,7 +612,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) if (crtc_state) { struct drm_encoder *best_encoder; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); @@ -650,18 +646,17 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* notify opregion of the sanitized encoder state */ intel_opregion_notify_encoder(encoder, connector && has_active_crtc); - if (HAS_DDI(i915)) + if (HAS_DDI(display)) intel_ddi_sanitize_encoder_pll_mapping(encoder); } /* FIXME read out full plane state for all planes */ -static void readout_plane_state(struct drm_i915_private *i915) +static void readout_plane_state(struct intel_display *display) { - struct intel_display *display = &i915->display; struct intel_plane *plane; struct intel_crtc *crtc; - for_each_intel_plane(&i915->drm, plane) { + for_each_intel_plane(display->drm, plane) { struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); struct intel_crtc_state *crtc_state; @@ -675,13 +670,13 @@ static void readout_plane_state(struct drm_i915_private *i915) intel_set_plane_visible(crtc_state, plane_state, visible); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[PLANE:%d:%s] hw state readout: %s, pipe %c\n", plane->base.base.id, plane->base.name, str_enabled_disabled(visible), pipe_name(pipe)); } - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); @@ -689,18 +684,17 @@ static void readout_plane_state(struct drm_i915_private *i915) } } -static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) +static void intel_modeset_readout_hw_state(struct intel_display *display) { - struct intel_display *display = &i915->display; struct intel_pmdemand_state *pmdemand_state = - to_intel_pmdemand_state(i915->display.pmdemand.obj.state); + to_intel_pmdemand_state(display->pmdemand.obj.state); enum pipe pipe; struct intel_crtc *crtc; struct intel_encoder *encoder; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); @@ -715,15 +709,15 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) crtc->base.enabled = crtc_state->hw.enable; crtc->active = crtc_state->hw.active; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CRTC:%d:%s] hw state readout: %s\n", crtc->base.base.id, crtc->base.name, str_enabled_disabled(crtc_state->hw.active)); } - readout_plane_state(i915); + readout_plane_state(display); - for_each_intel_encoder(&i915->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { struct intel_crtc_state *crtc_state = NULL; pipe = 0; @@ -742,7 +736,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) /* encoder should read be linked to joiner primary */ WARN_ON(intel_crtc_is_joiner_secondary(crtc_state)); - for_each_intel_crtc_in_pipe_mask(&i915->drm, secondary_crtc, + for_each_intel_crtc_in_pipe_mask(display->drm, secondary_crtc, intel_crtc_joiner_secondary_pipes(crtc_state)) { struct intel_crtc_state *secondary_crtc_state; @@ -765,7 +759,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) if (encoder->sync_state) encoder->sync_state(encoder, crtc_state); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] hw state readout: %s, pipe %c\n", encoder->base.base.id, encoder->base.name, str_enabled_disabled(encoder->base.crtc), @@ -774,7 +768,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) intel_dpll_readout_hw_state(display); - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { struct intel_crtc_state *crtc_state = NULL; @@ -808,14 +802,14 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) if (connector->sync_state) connector->sync_state(connector, crtc_state); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] hw state readout: %s\n", connector->base.base.id, connector->base.name, str_enabled_disabled(connector->base.encoder)); } drm_connector_list_iter_end(&conn_iter); - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane *plane; @@ -838,7 +832,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) intel_crtc_copy_hw_to_uapi_state(crtc_state); } - for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) { + for_each_intel_plane_on_crtc(display->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -854,14 +848,14 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) * use plane->min_cdclk() :( */ if (plane_state->uapi.visible && plane->min_cdclk) { - if (crtc_state->double_wide || DISPLAY_VER(i915) >= 10) + if (crtc_state->double_wide || DISPLAY_VER(display) >= 10) crtc_state->min_cdclk[plane->id] = DIV_ROUND_UP(crtc_state->pixel_rate, 2); else crtc_state->min_cdclk[plane->id] = crtc_state->pixel_rate; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "[PLANE:%d:%s] min_cdclk %d kHz\n", plane->base.base.id, plane->base.name, crtc_state->min_cdclk[plane->id]); @@ -882,11 +876,11 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) } static void -get_encoder_power_domains(struct drm_i915_private *i915) +get_encoder_power_domains(struct intel_display *display) { struct intel_encoder *encoder; - for_each_intel_encoder(&i915->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { struct intel_crtc_state *crtc_state; if (!encoder->get_power_domains) @@ -904,47 +898,47 @@ get_encoder_power_domains(struct drm_i915_private *i915) } } -static void intel_early_display_was(struct drm_i915_private *i915) +static void intel_early_display_was(struct intel_display *display) { /* * Display WA #1185 WaDisableDARBFClkGating:glk,icl,ehl,tgl * Also known as Wa_14010480278. */ - if (IS_DISPLAY_VER(i915, 10, 12)) - intel_de_rmw(i915, GEN9_CLKGATE_DIS_0, 0, DARBF_GATING_DIS); + if (IS_DISPLAY_VER(display, 10, 12)) + intel_de_rmw(display, GEN9_CLKGATE_DIS_0, 0, DARBF_GATING_DIS); /* * WaRsPkgCStateDisplayPMReq:hsw * System hang if this isn't done before disabling all planes! */ - if (IS_HASWELL(i915)) - intel_de_rmw(i915, CHICKEN_PAR1_1, 0, FORCE_ARB_IDLE_PLANES); + if (display->platform.haswell) + intel_de_rmw(display, CHICKEN_PAR1_1, 0, FORCE_ARB_IDLE_PLANES); - if (IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) { + if (display->platform.kabylake || display->platform.coffeelake || + display->platform.cometlake) { /* Display WA #1142:kbl,cfl,cml */ - intel_de_rmw(i915, CHICKEN_PAR1_1, + intel_de_rmw(display, CHICKEN_PAR1_1, KBL_ARB_FILL_SPARE_22, KBL_ARB_FILL_SPARE_22); - intel_de_rmw(i915, CHICKEN_MISC_2, + intel_de_rmw(display, CHICKEN_MISC_2, KBL_ARB_FILL_SPARE_13 | KBL_ARB_FILL_SPARE_14, KBL_ARB_FILL_SPARE_14); } } -void intel_modeset_setup_hw_state(struct drm_i915_private *i915, +void intel_modeset_setup_hw_state(struct intel_display *display, struct drm_modeset_acquire_ctx *ctx) { - struct intel_display *display = &i915->display; struct intel_encoder *encoder; struct intel_crtc *crtc; intel_wakeref_t wakeref; wakeref = intel_display_power_get(display, POWER_DOMAIN_INIT); - intel_early_display_was(i915); - intel_modeset_readout_hw_state(i915); + intel_early_display_was(display); + intel_modeset_readout_hw_state(display); /* HW state is read out, now we need to sanitize this mess. */ - get_encoder_power_domains(i915); + get_encoder_power_domains(display); intel_pch_sanitize(display); @@ -954,7 +948,7 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915, * intel_sanitize_plane_mapping() may need to do vblank * waits, so we need vblank interrupts restored beforehand. */ - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); @@ -968,20 +962,20 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915, } } - intel_fbc_sanitize(&i915->display); + intel_fbc_sanitize(display); - intel_sanitize_plane_mapping(i915); + intel_sanitize_plane_mapping(display); - for_each_intel_encoder(&i915->drm, encoder) + for_each_intel_encoder(display->drm, encoder) intel_sanitize_encoder(encoder); /* * Sanitizing CRTCs needs their connector atomic state to be * up-to-date, so ensure that already here. */ - intel_modeset_update_connector_atomic_state(i915); + intel_modeset_update_connector_atomic_state(display); - intel_sanitize_all_crtcs(i915, ctx); + intel_sanitize_all_crtcs(display, ctx); intel_dpll_sanitize_state(display); @@ -990,13 +984,13 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915, intel_wm_get_hw_state(display); intel_wm_sanitize(display); - for_each_intel_crtc(&i915->drm, crtc) { + for_each_intel_crtc(display->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_power_domain_mask put_domains; intel_modeset_get_crtc_power_domains(crtc_state, &put_domains); - if (drm_WARN_ON(&i915->drm, !bitmap_empty(put_domains.bits, POWER_DOMAIN_NUM))) + if (drm_WARN_ON(display->drm, !bitmap_empty(put_domains.bits, POWER_DOMAIN_NUM))) intel_modeset_put_crtc_power_domains(crtc, &put_domains); } diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.h b/drivers/gpu/drm/i915/display/intel_modeset_setup.h index 3beff67b33d03..f5e6f3ae9572d 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.h +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.h @@ -6,10 +6,10 @@ #ifndef __INTEL_MODESET_SETUP_H__ #define __INTEL_MODESET_SETUP_H__ -struct drm_i915_private; struct drm_modeset_acquire_ctx; +struct intel_display; -void intel_modeset_setup_hw_state(struct drm_i915_private *i915, +void intel_modeset_setup_hw_state(struct intel_display *display, struct drm_modeset_acquire_ctx *ctx); #endif /* __INTEL_MODESET_SETUP_H__ */ -- GitLab From 46b5871865ce02cc10977a2f1c575269f9a13770 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:44 +0300 Subject: [PATCH 291/899] drm/i915/display: convert intel_modeset_verify.c to struct intel_display Going forward, struct intel_display is the main display device data pointer. Convert intel_modeset_verify.[ch] to struct intel_display. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/b01a3ef3dbb2ffdaa6b5e9ebec14f91efcca3049.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_modeset_verify.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_modeset_verify.c b/drivers/gpu/drm/i915/display/intel_modeset_verify.c index a008412fdd04d..766a9983665a8 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_verify.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_verify.c @@ -6,13 +6,14 @@ */ #include +#include -#include "i915_drv.h" #include "intel_atomic.h" #include "intel_crtc.h" #include "intel_crtc_state_dump.h" #include "intel_cx0_phy.h" #include "intel_display.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_fdi.h" #include "intel_modeset_verify.h" @@ -28,9 +29,8 @@ static void intel_connector_verify_state(const struct intel_crtc_state *crtc_sta { struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s]\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n", connector->base.base.id, connector->base.name); if (connector->get_hw_state(connector)) { @@ -91,7 +91,6 @@ verify_connector_state(struct intel_atomic_state *state, static void intel_pipe_config_sanity_check(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); if (crtc_state->has_pch_encoder) { int fdi_dotclock = intel_dotclock_calculate(intel_fdi_link_freq(display, crtc_state), @@ -103,7 +102,7 @@ static void intel_pipe_config_sanity_check(const struct intel_crtc_state *crtc_s * Yell if the encoder disagrees. Allow for slight * rounding differences. */ - drm_WARN(&i915->drm, abs(fdi_dotclock - dotclock) > 1, + drm_WARN(display->drm, abs(fdi_dotclock - dotclock) > 1, "FDI dotclock and encoder dotclock mismatch, fdi: %i, encoder: %i\n", fdi_dotclock, dotclock); } @@ -113,17 +112,16 @@ static void verify_encoder_state(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); - struct drm_i915_private *i915 = to_i915(state->base.dev); struct intel_encoder *encoder; struct drm_connector *connector; const struct drm_connector_state *old_conn_state, *new_conn_state; int i; - for_each_intel_encoder(&i915->drm, encoder) { + for_each_intel_encoder(display->drm, encoder) { bool enabled = false, found = false; enum pipe pipe; - drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s]\n", + drm_dbg_kms(display->drm, "[ENCODER:%d:%s]\n", encoder->base.base.id, encoder->base.name); @@ -166,7 +164,6 @@ verify_crtc_state(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(state); - struct drm_i915_private *i915 = to_i915(display->drm); const struct intel_crtc_state *sw_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_crtc_state *hw_crtc_state; @@ -185,7 +182,7 @@ verify_crtc_state(struct intel_atomic_state *state, intel_crtc_get_pipe_config(hw_crtc_state); /* we keep both pipes enabled on 830 */ - if (IS_I830(i915) && hw_crtc_state->hw.active) + if (display->platform.i830 && hw_crtc_state->hw.active) hw_crtc_state->hw.active = sw_crtc_state->hw.active; INTEL_DISPLAY_STATE_WARN(display, -- GitLab From c370285b81d8a96451b3443bd973d2c27903c242 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:45 +0300 Subject: [PATCH 292/899] drm/i915/sprite: convert intel_sprite_uapi.c to struct intel_display Going forward, struct intel_display is the main display device data pointer. Convert intel_sprite_uapi.c to struct intel_display. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/d4f71c2976a1a28b4e74c2fc1097090fe7f78743.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_sprite_uapi.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sprite_uapi.c b/drivers/gpu/drm/i915/display/intel_sprite_uapi.c index 1d0b84b464c14..4981cc34da05f 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite_uapi.c +++ b/drivers/gpu/drm/i915/display/intel_sprite_uapi.c @@ -3,21 +3,21 @@ * Copyright © 2023 Intel Corporation */ -#include "i915_drv.h" #include "intel_crtc.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_sprite_uapi.h" -static bool has_dst_key_in_primary_plane(struct drm_i915_private *dev_priv) +static bool has_dst_key_in_primary_plane(struct intel_display *display) { - return DISPLAY_VER(dev_priv) >= 9; + return DISPLAY_VER(display) >= 9; } static void intel_plane_set_ckey(struct intel_plane_state *plane_state, const struct drm_intel_sprite_colorkey *set) { + struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct drm_intel_sprite_colorkey *key = &plane_state->ckey; *key = *set; @@ -34,7 +34,7 @@ static void intel_plane_set_ckey(struct intel_plane_state *plane_state, * On SKL+ we want dst key enabled on * the primary and not on the sprite. */ - if (DISPLAY_VER(dev_priv) >= 9 && plane->id != PLANE_PRIMARY && + if (DISPLAY_VER(display) >= 9 && plane->id != PLANE_PRIMARY && set->flags & I915_SET_COLORKEY_DESTINATION) key->flags = 0; } @@ -43,7 +43,6 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct intel_display *display = to_intel_display(dev); - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_intel_sprite_colorkey *set = data; struct drm_plane *plane; struct drm_plane_state *plane_state; @@ -61,7 +60,7 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, if ((set->flags & (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) == (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) return -EINVAL; - if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + if ((display->platform.valleyview || display->platform.cherryview) && set->flags & I915_SET_COLORKEY_DESTINATION) return -EINVAL; @@ -74,7 +73,7 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, * Also multiple planes can't do destination keying on the same * pipe simultaneously. */ - if (DISPLAY_VER(dev_priv) >= 9 && + if (DISPLAY_VER(display) >= 9 && to_intel_plane(plane)->id >= PLANE_3 && set->flags & I915_SET_COLORKEY_DESTINATION) return -EINVAL; @@ -99,7 +98,7 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, * On some platforms we have to configure * the dst colorkey on the primary plane. */ - if (!ret && has_dst_key_in_primary_plane(dev_priv)) { + if (!ret && has_dst_key_in_primary_plane(display)) { struct intel_crtc *crtc = intel_crtc_for_pipe(display, to_intel_plane(plane)->pipe); -- GitLab From d358cee74715ec0cbd616cf425d450623950b159 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:46 +0300 Subject: [PATCH 293/899] drm/i915/frontbuffer: convert intel_frontbuffer.[ch] to struct intel_display Going forward, struct intel_display is the main display device data pointer. Convert intel_frontbuffer.[ch] to struct intel_display. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/ef0860583b7d6ad141959f84c25657e0c102d6d2.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 7 +-- .../gpu/drm/i915/display/intel_frontbuffer.c | 60 +++++++++---------- .../gpu/drm/i915/display/intel_frontbuffer.h | 8 +-- drivers/gpu/drm/i915/display/intel_overlay.c | 6 +- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index db524d01e574d..33c09999c42e0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1043,14 +1043,13 @@ static void intel_post_plane_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(state); - struct drm_i915_private *dev_priv = to_i915(state->base.dev); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); enum pipe pipe = crtc->pipe; - intel_frontbuffer_flip(dev_priv, new_crtc_state->fb_bits); + intel_frontbuffer_flip(display, new_crtc_state->fb_bits); if (new_crtc_state->update_wm_post && new_crtc_state->hw.active) intel_update_watermarks(display); @@ -1281,7 +1280,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, static void intel_crtc_disable_planes(struct intel_atomic_state *state, struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(state); const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); unsigned int update_mask = new_crtc_state->update_planes; @@ -1303,7 +1302,7 @@ static void intel_crtc_disable_planes(struct intel_atomic_state *state, fb_bits |= plane->frontbuffer_bit; } - intel_frontbuffer_flip(dev_priv, fb_bits); + intel_frontbuffer_flip(display, fb_bits); } static void intel_encoders_update_prepare(struct intel_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index ba2f88ca61173..43be5377ddc1a 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -58,7 +58,6 @@ #include #include "i915_active.h" -#include "i915_drv.h" #include "i915_vma.h" #include "intel_bo.h" #include "intel_display_trace.h" @@ -72,7 +71,7 @@ /** * frontbuffer_flush - flush frontbuffer - * @i915: i915 device + * @display: display device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -82,16 +81,14 @@ * * Can be called without any locks held. */ -static void frontbuffer_flush(struct drm_i915_private *i915, +static void frontbuffer_flush(struct intel_display *display, unsigned int frontbuffer_bits, enum fb_op_origin origin) { - struct intel_display *display = &i915->display; - /* Delay flushing when rings are still busy.*/ - spin_lock(&i915->display.fb_tracking.lock); - frontbuffer_bits &= ~i915->display.fb_tracking.busy_bits; - spin_unlock(&i915->display.fb_tracking.lock); + spin_lock(&display->fb_tracking.lock); + frontbuffer_bits &= ~display->fb_tracking.busy_bits; + spin_unlock(&display->fb_tracking.lock); if (!frontbuffer_bits) return; @@ -107,7 +104,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915, /** * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip - * @i915: i915 device + * @display: display device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. The actual @@ -117,19 +114,19 @@ static void frontbuffer_flush(struct drm_i915_private *i915, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, +void intel_frontbuffer_flip_prepare(struct intel_display *display, unsigned frontbuffer_bits) { - spin_lock(&i915->display.fb_tracking.lock); - i915->display.fb_tracking.flip_bits |= frontbuffer_bits; + spin_lock(&display->fb_tracking.lock); + display->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ - i915->display.fb_tracking.busy_bits &= ~frontbuffer_bits; - spin_unlock(&i915->display.fb_tracking.lock); + display->fb_tracking.busy_bits &= ~frontbuffer_bits; + spin_unlock(&display->fb_tracking.lock); } /** * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip - * @i915: i915 device + * @display: display device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after the flip has been latched and will complete @@ -137,22 +134,22 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, +void intel_frontbuffer_flip_complete(struct intel_display *display, unsigned frontbuffer_bits) { - spin_lock(&i915->display.fb_tracking.lock); + spin_lock(&display->fb_tracking.lock); /* Mask any cancelled flips. */ - frontbuffer_bits &= i915->display.fb_tracking.flip_bits; - i915->display.fb_tracking.flip_bits &= ~frontbuffer_bits; - spin_unlock(&i915->display.fb_tracking.lock); + frontbuffer_bits &= display->fb_tracking.flip_bits; + display->fb_tracking.flip_bits &= ~frontbuffer_bits; + spin_unlock(&display->fb_tracking.lock); if (frontbuffer_bits) - frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP); + frontbuffer_flush(display, frontbuffer_bits, ORIGIN_FLIP); } /** * intel_frontbuffer_flip - synchronous frontbuffer flip - * @i915: i915 device + * @display: display device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. This is for @@ -161,15 +158,15 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, * * Can be called without any locks held. */ -void intel_frontbuffer_flip(struct drm_i915_private *i915, +void intel_frontbuffer_flip(struct intel_display *display, unsigned frontbuffer_bits) { - spin_lock(&i915->display.fb_tracking.lock); + spin_lock(&display->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ - i915->display.fb_tracking.busy_bits &= ~frontbuffer_bits; - spin_unlock(&i915->display.fb_tracking.lock); + display->fb_tracking.busy_bits &= ~frontbuffer_bits; + spin_unlock(&display->fb_tracking.lock); - frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP); + frontbuffer_flush(display, frontbuffer_bits, ORIGIN_FLIP); } void __intel_fb_invalidate(struct intel_frontbuffer *front, @@ -198,7 +195,6 @@ void __intel_fb_flush(struct intel_frontbuffer *front, unsigned int frontbuffer_bits) { struct intel_display *display = to_intel_display(front->obj->dev); - struct drm_i915_private *i915 = to_i915(display->drm); if (origin == ORIGIN_CS) { spin_lock(&display->fb_tracking.lock); @@ -209,7 +205,7 @@ void __intel_fb_flush(struct intel_frontbuffer *front, } if (frontbuffer_bits) - frontbuffer_flush(i915, frontbuffer_bits, origin); + frontbuffer_flush(display, frontbuffer_bits, origin); } static void intel_frontbuffer_flush_work(struct work_struct *work) @@ -280,7 +276,7 @@ static void frontbuffer_release(struct kref *ref) struct intel_frontbuffer * intel_frontbuffer_get(struct drm_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->dev); + struct intel_display *display = to_intel_display(obj->dev); struct intel_frontbuffer *front, *cur; front = intel_bo_get_frontbuffer(obj); @@ -300,9 +296,9 @@ intel_frontbuffer_get(struct drm_gem_object *obj) I915_ACTIVE_RETIRE_SLEEPS); INIT_WORK(&front->flush_work, intel_frontbuffer_flush_work); - spin_lock(&i915->display.fb_tracking.lock); + spin_lock(&display->fb_tracking.lock); cur = intel_bo_set_frontbuffer(obj, front); - spin_unlock(&i915->display.fb_tracking.lock); + spin_unlock(&display->fb_tracking.lock); if (cur != front) kfree(front); return cur; diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h index 6237780a9f683..2fee12eaf9b6c 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h @@ -31,7 +31,7 @@ #include "i915_active_types.h" struct drm_gem_object; -struct drm_i915_private; +struct intel_display; enum fb_op_origin { ORIGIN_CPU = 0, @@ -68,11 +68,11 @@ struct intel_frontbuffer { GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \ INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)) -void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, +void intel_frontbuffer_flip_prepare(struct intel_display *display, unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, +void intel_frontbuffer_flip_complete(struct intel_display *display, unsigned frontbuffer_bits); -void intel_frontbuffer_flip(struct drm_i915_private *i915, +void intel_frontbuffer_flip(struct intel_display *display, unsigned frontbuffer_bits); void intel_frontbuffer_put(struct intel_frontbuffer *front); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index aff9a3455c1b7..12308495afa51 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -291,7 +291,6 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay, struct i915_vma *vma) { struct intel_display *display = overlay->display; - struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe = overlay->crtc->pipe; struct intel_frontbuffer *frontbuffer = NULL; @@ -307,7 +306,7 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay, intel_frontbuffer_put(overlay->frontbuffer); overlay->frontbuffer = frontbuffer; - intel_frontbuffer_flip_prepare(i915, INTEL_FRONTBUFFER_OVERLAY(pipe)); + intel_frontbuffer_flip_prepare(display, INTEL_FRONTBUFFER_OVERLAY(pipe)); overlay->old_vma = overlay->vma; if (vma) @@ -359,14 +358,13 @@ static int intel_overlay_continue(struct intel_overlay *overlay, static void intel_overlay_release_old_vma(struct intel_overlay *overlay) { struct intel_display *display = overlay->display; - struct drm_i915_private *i915 = to_i915(display->drm); struct i915_vma *vma; vma = fetch_and_zero(&overlay->old_vma); if (drm_WARN_ON(display->drm, !vma)) return; - intel_frontbuffer_flip_complete(i915, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + intel_frontbuffer_flip_complete(display, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); i915_vma_unpin(vma); i915_vma_put(vma); -- GitLab From 56bcacebadb6f7a52ee1f890142c3a096f6d6073 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:47 +0300 Subject: [PATCH 294/899] drm/i915/crt: switch to display->platform based platform detection Prefer display->platform based platform detection over the old IS_*() macros. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/83980c1ae53157ef5d65d7ce99b294889622faa8.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_crt.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index cca22d2402e88..8908b51dc3b93 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -370,7 +370,7 @@ intel_crt_mode_valid(struct drm_connector *connector, if (HAS_PCH_LPT(dev_priv)) max_clock = 180000; - else if (IS_VALLEYVIEW(dev_priv)) + else if (display->platform.valleyview) /* * 270 MHz due to current DPLL limits, * DAC limit supposedly 355 MHz. @@ -591,7 +591,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) if (HAS_PCH_SPLIT(dev_priv)) return ilk_crt_detect_hotplug(connector); - if (IS_VALLEYVIEW(dev_priv)) + if (display->platform.valleyview) return valleyview_crt_detect_hotplug(connector); /* @@ -599,7 +599,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) * to get a reliable result. */ - if (IS_G45(dev_priv)) + if (display->platform.g45) tries = 2; else tries = 1; @@ -940,7 +940,6 @@ out: static int intel_crt_get_modes(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); struct intel_encoder *encoder = &crt->base; intel_wakeref_t wakeref; @@ -953,7 +952,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) wakeref = intel_display_power_get(display, encoder->power_domain); ret = intel_crt_ddc_get_modes(connector, connector->ddc); - if (ret || !IS_G4X(dev_priv)) + if (ret || !display->platform.g4x) goto out; /* Try to probe digital port for output in DVI-I -> VGA mode. */ @@ -1021,7 +1020,7 @@ void intel_crt_init(struct intel_display *display) if (HAS_PCH_SPLIT(dev_priv)) adpa_reg = PCH_ADPA; - else if (IS_VALLEYVIEW(dev_priv)) + else if (display->platform.valleyview) adpa_reg = VLV_ADPA; else adpa_reg = ADPA; @@ -1069,7 +1068,7 @@ void intel_crt_init(struct intel_display *display) crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = BIT(INTEL_OUTPUT_DVO) | BIT(INTEL_OUTPUT_HDMI); - if (IS_I830(dev_priv)) + if (display->platform.i830) crt->base.pipe_mask = BIT(PIPE_A); else crt->base.pipe_mask = ~0; -- GitLab From d42652314cb232a39627089c790d141c11b1c3a2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:48 +0300 Subject: [PATCH 295/899] drm/i915/dmc: switch to display->platform based platform detection Prefer display->platform based platform detection over the old IS_*() macros. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/eda2b6cd285ec76d57d91ea3fe33158852aaec22.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dmc.c | 45 ++++++++++-------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 98f80a6c63e80..ff2b97a752b11 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -173,7 +173,6 @@ MODULE_FIRMWARE(BXT_DMC_PATH); static const char *dmc_firmware_default(struct intel_display *display, u32 *size) { - struct drm_i915_private *i915 = to_i915(display->drm); const char *fw_path = NULL; u32 max_fw_size = 0; @@ -189,39 +188,39 @@ static const char *dmc_firmware_default(struct intel_display *display, u32 *size } else if (DISPLAY_VERx100(display) == 1400) { fw_path = MTL_DMC_PATH; max_fw_size = XELPDP_DMC_MAX_FW_SIZE; - } else if (IS_DG2(i915)) { + } else if (display->platform.dg2) { fw_path = DG2_DMC_PATH; max_fw_size = DISPLAY_VER13_DMC_MAX_FW_SIZE; - } else if (IS_ALDERLAKE_P(i915)) { + } else if (display->platform.alderlake_p) { fw_path = ADLP_DMC_PATH; max_fw_size = DISPLAY_VER13_DMC_MAX_FW_SIZE; - } else if (IS_ALDERLAKE_S(i915)) { + } else if (display->platform.alderlake_s) { fw_path = ADLS_DMC_PATH; max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE; - } else if (IS_DG1(i915)) { + } else if (display->platform.dg1) { fw_path = DG1_DMC_PATH; max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE; - } else if (IS_ROCKETLAKE(i915)) { + } else if (display->platform.rocketlake) { fw_path = RKL_DMC_PATH; max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE; - } else if (IS_TIGERLAKE(i915)) { + } else if (display->platform.tigerlake) { fw_path = TGL_DMC_PATH; max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE; } else if (DISPLAY_VER(display) == 11) { fw_path = ICL_DMC_PATH; max_fw_size = ICL_DMC_MAX_FW_SIZE; - } else if (IS_GEMINILAKE(i915)) { + } else if (display->platform.geminilake) { fw_path = GLK_DMC_PATH; max_fw_size = GLK_DMC_MAX_FW_SIZE; - } else if (IS_KABYLAKE(i915) || - IS_COFFEELAKE(i915) || - IS_COMETLAKE(i915)) { + } else if (display->platform.kabylake || + display->platform.coffeelake || + display->platform.cometlake) { fw_path = KBL_DMC_PATH; max_fw_size = KBL_DMC_MAX_FW_SIZE; - } else if (IS_SKYLAKE(i915)) { + } else if (display->platform.skylake) { fw_path = SKL_DMC_PATH; max_fw_size = SKL_DMC_MAX_FW_SIZE; - } else if (IS_BROXTON(i915)) { + } else if (display->platform.broxton) { fw_path = BXT_DMC_PATH; max_fw_size = BXT_DMC_MAX_FW_SIZE; } @@ -541,8 +540,6 @@ static bool disable_dmc_evt(struct intel_display *display, enum intel_dmc_id dmc_id, i915_reg_t reg, u32 data) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (!is_dmc_evt_ctl_reg(display, dmc_id, reg)) return false; @@ -551,12 +548,12 @@ static bool disable_dmc_evt(struct intel_display *display, return true; /* also disable the flip queue event on the main DMC on TGL */ - if (IS_TIGERLAKE(i915) && + if (display->platform.tigerlake && REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC) return true; /* also disable the HRR event on the main DMC on TGL/ADLS */ - if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) && + if ((display->platform.tigerlake || display->platform.alderlake_s) && REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A) return true; @@ -588,7 +585,6 @@ static u32 dmc_mmiodata(struct intel_display *display, */ void intel_dmc_load_program(struct intel_display *display) { - struct drm_i915_private *i915 __maybe_unused = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct intel_dmc *dmc = display_to_dmc(display); enum intel_dmc_id dmc_id; @@ -1012,9 +1008,7 @@ static void intel_dmc_runtime_pm_put(struct intel_display *display) static const char *dmc_fallback_path(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - if (IS_ALDERLAKE_P(i915)) + if (display->platform.alderlake_p) return ADLP_DMC_FALLBACK_PATH; return NULL; @@ -1279,7 +1273,6 @@ static bool intel_dmc_get_dc6_allowed_count(struct intel_display *display, u32 * static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused) { struct intel_display *display = m->private; - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_dmc *dmc = display_to_dmc(display); struct ref_tracker *wakeref; i915_reg_t dc5_reg, dc6_reg = INVALID_MMIO_REG; @@ -1299,7 +1292,7 @@ static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused) seq_printf(m, "Pipe A fw loaded: %s\n", str_yes_no(has_dmc_id_fw(display, DMC_FW_PIPEA))); seq_printf(m, "Pipe B fw needed: %s\n", - str_yes_no(IS_ALDERLAKE_P(i915) || + str_yes_no(display->platform.alderlake_p || DISPLAY_VER(display) >= 14)); seq_printf(m, "Pipe B fw loaded: %s\n", str_yes_no(has_dmc_id_fw(display, DMC_FW_PIPEB))); @@ -1313,7 +1306,7 @@ static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused) if (DISPLAY_VER(display) >= 12) { i915_reg_t dc3co_reg; - if (IS_DGFX(i915) || DISPLAY_VER(display) >= 14) { + if (display->platform.dgfx || DISPLAY_VER(display) >= 14) { dc3co_reg = DG1_DMC_DEBUG3; dc5_reg = DG1_DMC_DEBUG_DC5_COUNT; } else { @@ -1325,9 +1318,9 @@ static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused) seq_printf(m, "DC3CO count: %d\n", intel_de_read(display, dc3co_reg)); } else { - dc5_reg = IS_BROXTON(i915) ? BXT_DMC_DC3_DC5_COUNT : + dc5_reg = display->platform.broxton ? BXT_DMC_DC3_DC5_COUNT : SKL_DMC_DC3_DC5_COUNT; - if (!IS_GEMINILAKE(i915) && !IS_BROXTON(i915)) + if (!display->platform.geminilake && !display->platform.broxton) dc6_reg = SKL_DMC_DC5_DC6_COUNT; } -- GitLab From af6fe09131afe6e3f56ef547c2537206f01a1147 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:49 +0300 Subject: [PATCH 296/899] drm/i915/dp-aux: switch to display->platform based platform detection Prefer display->platform based platform detection over the old IS_*() macros. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/a5cde717001eb2843344beb21ca8907ab2e43d4f.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_aux.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 0496061203fbf..3245a15935dba 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -177,12 +177,11 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp, int send_bytes, u32 aux_clock_divider) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_display *display = to_intel_display(intel_dp); u32 timeout; /* Max timeout value on G4x-BDW: 1.6ms */ - if (IS_BROADWELL(i915)) + if (display->platform.broadwell) timeout = DP_AUX_CH_CTL_TIME_OUT_600us; else timeout = DP_AUX_CH_CTL_TIME_OUT_400us; @@ -804,7 +803,7 @@ void intel_dp_aux_init(struct intel_dp *intel_dp) } else if (HAS_PCH_SPLIT(i915)) { intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg; intel_dp->aux_ch_data_reg = ilk_aux_data_reg; - } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + } else if (display->platform.valleyview || display->platform.cherryview) { intel_dp->aux_ch_ctl_reg = vlv_aux_ctl_reg; intel_dp->aux_ch_data_reg = vlv_aux_data_reg; } else { @@ -814,7 +813,7 @@ void intel_dp_aux_init(struct intel_dp *intel_dp) if (DISPLAY_VER(display) >= 9) intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; - else if (IS_BROADWELL(i915) || IS_HASWELL(i915)) + else if (display->platform.broadwell || display->platform.haswell) intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; else if (HAS_PCH_SPLIT(i915)) intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; -- GitLab From 5739a143db0631f33fc44dc3d2617591da514898 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:50 +0300 Subject: [PATCH 297/899] drm/i915/dpio: switch to display->platform based platform detection Prefer display->platform based platform detection over the old IS_*() macros. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/36166cd0cfdb88df4c0322c4edea69fad5ad7177.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dpio_phy.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 1e1af71507235..69f2421394205 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -222,9 +222,7 @@ static const struct bxt_dpio_phy_info glk_dpio_phy_info[] = { static const struct bxt_dpio_phy_info * bxt_get_phy_list(struct intel_display *display, int *count) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - - if (IS_GEMINILAKE(dev_priv)) { + if (display->platform.geminilake) { *count = ARRAY_SIZE(glk_dpio_phy_info); return glk_dpio_phy_info; } else { -- GitLab From 1058ee1b20da9a818099b5e35122de8c50f05ce0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:51 +0300 Subject: [PATCH 298/899] drm/i915/gmbus: switch to display->platform based platform detection Prefer display->platform based platform detection over the old IS_*() macros. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/7a69d2ffa15306da899b98e0d6af09b4df1b7ec3.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_gmbus.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index abf457e68ee99..2a530bf35ebbf 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -171,13 +171,13 @@ static const struct gmbus_pin *get_gmbus_pin(struct intel_display *display, } else if (HAS_PCH_CNP(i915)) { pins = gmbus_pins_cnp; size = ARRAY_SIZE(gmbus_pins_cnp); - } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { + } else if (display->platform.geminilake || display->platform.broxton) { pins = gmbus_pins_bxt; size = ARRAY_SIZE(gmbus_pins_bxt); } else if (DISPLAY_VER(display) == 9) { pins = gmbus_pins_skl; size = ARRAY_SIZE(gmbus_pins_skl); - } else if (IS_BROADWELL(i915)) { + } else if (display->platform.broadwell) { pins = gmbus_pins_bdw; size = ARRAY_SIZE(gmbus_pins_bdw); } else { @@ -240,11 +240,10 @@ static void bxt_gmbus_clock_gating(struct intel_display *display, static u32 get_reserved(struct intel_gmbus *bus) { struct intel_display *display = bus->display; - struct drm_i915_private *i915 = to_i915(display->drm); u32 reserved = 0; /* On most chips, these bits must be preserved in software. */ - if (!IS_I830(i915) && !IS_I845G(i915)) + if (!display->platform.i830 && !display->platform.i845g) reserved = intel_de_read_notrace(display, bus->gpio_reg) & (GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE); @@ -314,11 +313,10 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter) { struct intel_gmbus *bus = to_intel_gmbus(adapter); struct intel_display *display = bus->display; - struct drm_i915_private *i915 = to_i915(display->drm); intel_gmbus_reset(display); - if (IS_PINEVIEW(i915)) + if (display->platform.pineview) pnv_gmbus_clock_gating(display, false); set_data(bus, 1); @@ -332,12 +330,11 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter) { struct intel_gmbus *bus = to_intel_gmbus(adapter); struct intel_display *display = bus->display; - struct drm_i915_private *i915 = to_i915(display->drm); set_data(bus, 1); set_clock(bus, 1); - if (IS_PINEVIEW(i915)) + if (display->platform.pineview) pnv_gmbus_clock_gating(display, true); } @@ -635,7 +632,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, int ret = 0; /* Display WA #0868: skl,bxt,kbl,cfl,glk */ - if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) + if (display->platform.geminilake || display->platform.broxton) bxt_gmbus_clock_gating(display, false); else if (HAS_PCH_SPT(i915) || HAS_PCH_CNP(i915)) pch_gmbus_clock_gating(display, false); @@ -748,7 +745,7 @@ timeout: out: /* Display WA #0868: skl,bxt,kbl,cfl,glk */ - if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) + if (display->platform.geminilake || display->platform.broxton) bxt_gmbus_clock_gating(display, true); else if (HAS_PCH_SPT(i915) || HAS_PCH_CNP(i915)) pch_gmbus_clock_gating(display, true); @@ -873,12 +870,11 @@ static const struct i2c_lock_operations gmbus_lock_ops = { */ int intel_gmbus_setup(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct pci_dev *pdev = to_pci_dev(display->drm->dev); unsigned int pin; int ret; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (display->platform.valleyview || display->platform.cherryview) display->gmbus.mmio_base = VLV_DISPLAY_BASE; else if (!HAS_GMCH(display)) /* @@ -925,7 +921,7 @@ int intel_gmbus_setup(struct intel_display *display) bus->reg0 = pin | GMBUS_RATE_100KHZ; /* gmbus seems to be broken on i830 */ - if (IS_I830(i915)) + if (display->platform.i830) bus->force_bit = 1; intel_gpio_setup(bus, GPIO(display, gmbus_pin->gpio)); -- GitLab From 4a3506d174cf2bf506985e0194873e4e5623ad72 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:52 +0300 Subject: [PATCH 299/899] drm/i915/hdmi: switch to display->platform based platform detection Prefer display->platform based platform detection over the old IS_*() macros. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/02659f1144180f328167734f7e31499833749c8d.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hdmi.c | 44 +++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f9fa17e1f584b..120c63dfdd02e 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -769,7 +769,7 @@ intel_hdmi_compute_spd_infoframe(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(crtc_state); struct hdmi_spd_infoframe *frame = &crtc_state->infoframes.spd.spd; int ret; @@ -779,7 +779,7 @@ intel_hdmi_compute_spd_infoframe(struct intel_encoder *encoder, crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_SPD); - if (IS_DGFX(i915)) + if (display->platform.dgfx) ret = hdmi_spd_infoframe_init(frame, "Intel", "Discrete gfx"); else ret = hdmi_spd_infoframe_init(frame, "Intel", "Integrated gfx"); @@ -989,7 +989,7 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder, if (HAS_DDI(display)) reg = HSW_TVIDEO_DIP_GCP(display, crtc_state->cpu_transcoder); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + else if (display->platform.valleyview || display->platform.cherryview) reg = VLV_TVIDEO_DIP_GCP(crtc->pipe); else if (HAS_PCH_SPLIT(dev_priv)) reg = TVIDEO_DIP_GCP(crtc->pipe); @@ -1015,7 +1015,7 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder, if (HAS_DDI(display)) reg = HSW_TVIDEO_DIP_GCP(display, crtc_state->cpu_transcoder); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + else if (display->platform.valleyview || display->platform.cherryview) reg = VLV_TVIDEO_DIP_GCP(crtc->pipe); else if (HAS_PCH_SPLIT(dev_priv)) reg = TVIDEO_DIP_GCP(crtc->pipe); @@ -1029,9 +1029,9 @@ static void intel_hdmi_compute_gcp_infoframe(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); - if (IS_G4X(dev_priv) || !crtc_state->has_infoframe) + if (display->platform.g4x || !crtc_state->has_infoframe) return; crtc_state->infoframes.enable |= @@ -1539,7 +1539,6 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, struct intel_display *display = to_intel_display(dig_port); struct intel_hdmi *hdmi = &dig_port->hdmi; struct intel_connector *connector = hdmi->attached_connector; - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); int ret; if (!enable) @@ -1558,7 +1557,7 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, * WA: To fix incorrect positioning of the window of * opportunity and enc_en signalling in KABYLAKE. */ - if (IS_KABYLAKE(dev_priv) && enable) + if (display->platform.kabylake && enable) return kbl_repositioning_enc_en_signal(connector, cpu_transcoder); @@ -1814,14 +1813,13 @@ static const struct intel_hdcp_shim intel_hdmi_hdcp_shim = { static int intel_hdmi_source_max_tmds_clock(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int max_tmds_clock, vbt_max_tmds_clock; - if (DISPLAY_VER(display) >= 13 || IS_ALDERLAKE_S(dev_priv)) + if (DISPLAY_VER(display) >= 13 || display->platform.alderlake_s) max_tmds_clock = 600000; else if (DISPLAY_VER(display) >= 10) max_tmds_clock = 594000; - else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(dev_priv)) + else if (DISPLAY_VER(display) >= 8 || display->platform.haswell) max_tmds_clock = 300000; else if (DISPLAY_VER(display) >= 5) max_tmds_clock = 225000; @@ -1880,7 +1878,6 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, bool has_hdmi_sink) { struct intel_display *display = to_intel_display(hdmi); - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_encoder *encoder = &hdmi_to_dig_port(hdmi)->base; if (clock < 25000) @@ -1890,16 +1887,16 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, return MODE_CLOCK_HIGH; /* GLK DPLL can't generate 446-480 MHz */ - if (IS_GEMINILAKE(dev_priv) && clock > 446666 && clock < 480000) + if (display->platform.geminilake && clock > 446666 && clock < 480000) return MODE_CLOCK_RANGE; /* BXT/GLK DPLL can't generate 223-240 MHz */ - if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + if ((display->platform.geminilake || display->platform.broxton) && clock > 223333 && clock < 240000) return MODE_CLOCK_RANGE; /* CHV DPLL can't generate 216-240 MHz */ - if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) + if (display->platform.cherryview && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; /* ICL+ combo PHY PLL can't generate 500-533.2 MHz */ @@ -2440,7 +2437,6 @@ static void intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector)); struct intel_encoder *encoder = &hdmi_to_dig_port(hdmi)->base; struct i2c_adapter *ddc = connector->ddc; @@ -2481,7 +2477,7 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector) hdmi->dp_dual_mode.max_tmds_clock); /* Older VBTs are often buggy and can't be trusted :( Play it safe. */ - if ((DISPLAY_VER(display) >= 8 || IS_HASWELL(dev_priv)) && + if ((DISPLAY_VER(display) >= 8 || display->platform.haswell) && !intel_bios_encoder_supports_dp_dual_mode(encoder->devdata)) { drm_dbg_kms(display->drm, "Ignoring DP dual mode adaptor max TMDS clock for native HDMI port\n"); @@ -2898,24 +2894,24 @@ static u8 intel_hdmi_default_ddc_pin(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u8 ddc_pin; - if (IS_ALDERLAKE_S(dev_priv)) + if (display->platform.alderlake_s) ddc_pin = adls_encoder_to_ddc_pin(encoder); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1) ddc_pin = dg1_encoder_to_ddc_pin(encoder); - else if (IS_ROCKETLAKE(dev_priv)) + else if (display->platform.rocketlake) ddc_pin = rkl_encoder_to_ddc_pin(encoder); else if (DISPLAY_VER(display) == 9 && HAS_PCH_TGP(dev_priv)) ddc_pin = gen9bc_tgp_encoder_to_ddc_pin(encoder); - else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) && + else if ((display->platform.jasperlake || display->platform.elkhartlake) && HAS_PCH_TGP(dev_priv)) ddc_pin = mcc_encoder_to_ddc_pin(encoder); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) ddc_pin = icl_encoder_to_ddc_pin(encoder); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_encoder_to_ddc_pin(encoder); - else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) + else if (display->platform.geminilake || display->platform.broxton) ddc_pin = bxt_encoder_to_ddc_pin(encoder); - else if (IS_CHERRYVIEW(dev_priv)) + else if (display->platform.cherryview) ddc_pin = chv_encoder_to_ddc_pin(encoder); else ddc_pin = g4x_encoder_to_ddc_pin(encoder); @@ -2992,12 +2988,12 @@ void intel_infoframe_init(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + if (display->platform.valleyview || display->platform.cherryview) { dig_port->write_infoframe = vlv_write_infoframe; dig_port->read_infoframe = vlv_read_infoframe; dig_port->set_infoframes = vlv_set_infoframes; dig_port->infoframes_enabled = vlv_infoframes_enabled; - } else if (IS_G4X(dev_priv)) { + } else if (display->platform.g4x) { dig_port->write_infoframe = g4x_write_infoframe; dig_port->read_infoframe = g4x_read_infoframe; dig_port->set_infoframes = g4x_set_infoframes; -- GitLab From 69cb72d3938f9bf7299190408ae81c655d8ee247 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:53 +0300 Subject: [PATCH 300/899] drm/i915/display: switch to display->platform.dgfx from IS_DGFX() Prefer display->platform.dgfx based platform detection over the old IS_DGFX() macro. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/99de7f8f26156afbddcdac850088e6a96d322c55.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- drivers/gpu/drm/i915/display/intel_fb.c | 4 ++-- drivers/gpu/drm/i915/display/intel_plane_initial.c | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index fabfcf2caa69e..8fe854a452437 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3126,7 +3126,7 @@ static const struct vbt_header *intel_bios_get_vbt(struct intel_display *display * If the OpRegion does not have VBT, look in SPI flash * through MMIO or PCI mapping */ - if (!vbt && IS_DGFX(i915)) + if (!vbt && display->platform.dgfx) with_intel_display_rpm(display) vbt = oprom_get_vbt(display, intel_rom_spi(i915), sizep, "SPI flash"); diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 2b0e0f220442f..05393bd60c989 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -560,11 +560,11 @@ static bool plane_has_modifier(struct intel_display *display, return false; if (md->modifier == I915_FORMAT_MOD_4_TILED_BMG_CCS && - (GRAPHICS_VER(i915) < 20 || !IS_DGFX(i915))) + (GRAPHICS_VER(i915) < 20 || !display->platform.dgfx)) return false; if (md->modifier == I915_FORMAT_MOD_4_TILED_LNL_CCS && - (GRAPHICS_VER(i915) < 20 || IS_DGFX(i915))) + (GRAPHICS_VER(i915) < 20 || display->platform.dgfx)) return false; return true; diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index b0c4892775ce6..c00d9184c5866 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -53,9 +53,11 @@ intel_reuse_initial_plane_obj(struct intel_crtc *this, } static enum intel_memory_type -initial_plane_memory_type(struct drm_i915_private *i915) +initial_plane_memory_type(struct intel_display *display) { - if (IS_DGFX(i915)) + struct drm_i915_private *i915 = to_i915(display->drm); + + if (display->platform.dgfx) return INTEL_MEMORY_LOCAL; else if (HAS_LMEMBAR_SMEM_STOLEN(i915)) return INTEL_MEMORY_STOLEN_LOCAL; @@ -75,7 +77,7 @@ initial_plane_phys(struct intel_display *display, dma_addr_t dma_addr; u32 base; - mem_type = initial_plane_memory_type(i915); + mem_type = initial_plane_memory_type(display); mem = intel_memory_region_by_type(i915, mem_type); if (!mem) { drm_dbg_kms(display->drm, -- GitLab From a8c2c8d329539e9a5f03f3163ce86a56e4651487 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:54 +0300 Subject: [PATCH 301/899] drm/i915/audio: don't set LPE audio irq chip data, it's unused Nobody uses the irq chip data. Stop setting it, and as a bonus get rid of another struct drm_i915_private * reference. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/d75ec986093c912de67a42782aa5a49357a9f8e5.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_lpe_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index 59551c8414c2d..666148a145229 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -179,7 +179,7 @@ static int lpe_audio_irq_init(struct intel_display *display) handle_simple_irq, "hdmi_lpe_audio_irq_handler"); - return irq_set_chip_data(irq, dev_priv); + return 0; } static bool lpe_audio_detect(struct intel_display *display) -- GitLab From 1832fd2bdbf39388e642a138c1448ed147d36641 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 9 Apr 2025 21:17:55 +0300 Subject: [PATCH 302/899] drm/xe/compat: clean up unused platform check macros Clean up unused platform check macros from compat i915_drv.h. Display no longer uses any of the IS_*() platform checks. The remaining users are part of the soc/ code. Note that in a comment. Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/2f09b3c60223d9426049a28d3d06a3ec2c6ec348.1744222449.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/xe/compat-i915-headers/i915_drv.h | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index f89bd5e3520dc..dd36f9b06b89e 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -21,28 +21,12 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) return container_of(dev, struct drm_i915_private, drm); } +/* compat platform checks only for soc/ usage */ #define IS_PLATFORM(xe, x) ((xe)->info.platform == x) -#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define IS_I830(dev_priv) (dev_priv && 0) -#define IS_I845G(dev_priv) (dev_priv && 0) -#define IS_I85X(dev_priv) (dev_priv && 0) -#define IS_I865G(dev_priv) (dev_priv && 0) #define IS_I915G(dev_priv) (dev_priv && 0) #define IS_I915GM(dev_priv) (dev_priv && 0) -#define IS_I945G(dev_priv) (dev_priv && 0) -#define IS_I945GM(dev_priv) (dev_priv && 0) -#define IS_I965G(dev_priv) (dev_priv && 0) -#define IS_I965GM(dev_priv) (dev_priv && 0) -#define IS_G45(dev_priv) (dev_priv && 0) -#define IS_GM45(dev_priv) (dev_priv && 0) -#define IS_G4X(dev_priv) (dev_priv && 0) #define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_G33(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) -#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) #define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVB_GT1(dev_priv) (dev_priv && 0) #define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) #define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) #define IS_HASWELL(dev_priv) (dev_priv && 0) @@ -70,28 +54,10 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) #define IS_MOBILE(xe) (xe && 0) -#define IS_TIGERLAKE_UY(xe) (xe && 0) -#define IS_COMETLAKE_ULX(xe) (xe && 0) -#define IS_COFFEELAKE_ULX(xe) (xe && 0) -#define IS_KABYLAKE_ULX(xe) (xe && 0) -#define IS_SKYLAKE_ULX(xe) (xe && 0) -#define IS_HASWELL_ULX(xe) (xe && 0) -#define IS_COMETLAKE_ULT(xe) (xe && 0) -#define IS_COFFEELAKE_ULT(xe) (xe && 0) -#define IS_KABYLAKE_ULT(xe) (xe && 0) -#define IS_SKYLAKE_ULT(xe) (xe && 0) - -#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) -#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) -#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) -#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) -#define IS_ICL_WITH_PORT_F(xe) (xe && 0) #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) - #define HAS_128_BYTE_Y_TILING(xe) (xe || 1) #ifdef CONFIG_ARM64 -- GitLab From cb271c2edfd0ab7204d5ef3c9d5ae9a0710f5bf2 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:36:56 +0200 Subject: [PATCH 303/899] rust: device: implement impl_device_context_deref! The Deref hierarchy for device context generics is the same for every (bus specific) device. Implement those with a generic macro to avoid duplicated boiler plate code and ensure the correct Deref hierarchy for every device implementation. Co-developed-by: Benno Lossin Signed-off-by: Benno Lossin Reviewed-by: Christian Schrefl Link: https://lore.kernel.org/r/20250413173758.12068-2-dakr@kernel.org [ Add missing `::` prefix in macros. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 44 +++++++++++++++++++++++++++++++++++++++++ rust/kernel/pci.rs | 16 +++------------ rust/kernel/platform.rs | 17 +++------------- 3 files changed, 50 insertions(+), 27 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 21b343a1dc4d2..9520e054996d6 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -235,6 +235,50 @@ mod private { impl DeviceContext for Core {} impl DeviceContext for Normal {} +/// # Safety +/// +/// The type given as `$device` must be a transparent wrapper of a type that doesn't depend on the +/// generic argument of `$device`. +#[doc(hidden)] +#[macro_export] +macro_rules! __impl_device_context_deref { + (unsafe { $device:ident, $src:ty => $dst:ty }) => { + impl ::core::ops::Deref for $device<$src> { + type Target = $device<$dst>; + + fn deref(&self) -> &Self::Target { + let ptr: *const Self = self; + + // CAST: `$device<$src>` and `$device<$dst>` transparently wrap the same type by the + // safety requirement of the macro. + let ptr = ptr.cast::(); + + // SAFETY: `ptr` was derived from `&self`. + unsafe { &*ptr } + } + } + }; +} + +/// Implement [`core::ops::Deref`] traits for allowed [`DeviceContext`] conversions of a (bus +/// specific) device. +/// +/// # Safety +/// +/// The type given as `$device` must be a transparent wrapper of a type that doesn't depend on the +/// generic argument of `$device`. +#[macro_export] +macro_rules! impl_device_context_deref { + (unsafe { $device:ident }) => { + // SAFETY: This macro has the exact same safety requirement as + // `__impl_device_context_deref!`. + ::kernel::__impl_device_context_deref!(unsafe { + $device, + $crate::device::Core => $crate::device::Normal + }); + }; +} + #[doc(hidden)] #[macro_export] macro_rules! dev_printk { diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index c97d6d470b282..8474608e7a90a 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -422,19 +422,9 @@ impl Device { } } -impl Deref for Device { - type Target = Device; - - fn deref(&self) -> &Self::Target { - let ptr: *const Self = self; - - // CAST: `Device` is a transparent wrapper of `Opaque`. - let ptr = ptr.cast::(); - - // SAFETY: `ptr` was derived from `&self`. - unsafe { &*ptr } - } -} +// SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic +// argument. +kernel::impl_device_context_deref!(unsafe { Device }); impl From<&Device> for ARef { fn from(dev: &Device) -> Self { diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 4917cb34e2fe8..22590bdff7bbf 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -16,7 +16,6 @@ use crate::{ use core::{ marker::PhantomData, - ops::Deref, ptr::{addr_of_mut, NonNull}, }; @@ -190,19 +189,9 @@ impl Device { } } -impl Deref for Device { - type Target = Device; - - fn deref(&self) -> &Self::Target { - let ptr: *const Self = self; - - // CAST: `Device` is a transparent wrapper of `Opaque`. - let ptr = ptr.cast::(); - - // SAFETY: `ptr` was derived from `&self`. - unsafe { &*ptr } - } -} +// SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic +// argument. +kernel::impl_device_context_deref!(unsafe { Device }); impl From<&Device> for ARef { fn from(dev: &Device) -> Self { -- GitLab From fbb92b6a534081cabd75861ac9c7a8d29d8effda Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:36:57 +0200 Subject: [PATCH 304/899] rust: device: implement impl_device_context_into_aref! Implement a macro to implement all From conversions of a certain device to ARef. This avoids unnecessary boiler plate code for every device implementation. Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250413173758.12068-3-dakr@kernel.org [ Add missing `::` prefix in macros. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 21 +++++++++++++++++++++ rust/kernel/pci.rs | 7 +------ rust/kernel/platform.rs | 9 ++------- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 9520e054996d6..7b96c99879a62 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -279,6 +279,27 @@ macro_rules! impl_device_context_deref { }; } +#[doc(hidden)] +#[macro_export] +macro_rules! __impl_device_context_into_aref { + ($src:ty, $device:tt) => { + impl ::core::convert::From<&$device<$src>> for $crate::types::ARef<$device> { + fn from(dev: &$device<$src>) -> Self { + (&**dev).into() + } + } + }; +} + +/// Implement [`core::convert::From`], such that all `&Device` can be converted to an +/// `ARef`. +#[macro_export] +macro_rules! impl_device_context_into_aref { + ($device:tt) => { + ::kernel::__impl_device_context_into_aref!($crate::device::Core, $device); + }; +} + #[doc(hidden)] #[macro_export] macro_rules! dev_printk { diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 8474608e7a90a..7c6ec05383e0d 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -425,12 +425,7 @@ impl Device { // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic // argument. kernel::impl_device_context_deref!(unsafe { Device }); - -impl From<&Device> for ARef { - fn from(dev: &Device) -> Self { - (&**dev).into() - } -} +kernel::impl_device_context_into_aref!(Device); // SAFETY: Instances of `Device` are always reference-counted. unsafe impl crate::types::AlwaysRefCounted for Device { diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 22590bdff7bbf..9476b717c4255 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -10,7 +10,7 @@ use crate::{ of, prelude::*, str::CStr, - types::{ARef, ForeignOwnable, Opaque}, + types::{ForeignOwnable, Opaque}, ThisModule, }; @@ -192,12 +192,7 @@ impl Device { // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic // argument. kernel::impl_device_context_deref!(unsafe { Device }); - -impl From<&Device> for ARef { - fn from(dev: &Device) -> Self { - (&**dev).into() - } -} +kernel::impl_device_context_into_aref!(Device); // SAFETY: Instances of `Device` are always reference-counted. unsafe impl crate::types::AlwaysRefCounted for Device { -- GitLab From d32e4c24a7fe01195ba427c67fc15864279a3c0d Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:36:58 +0200 Subject: [PATCH 305/899] rust: device: implement device context for Device Analogous to bus specific device, implement the DeviceContext generic for generic devices. This is used for APIs that work with generic devices (such as Devres) to evaluate the device's context. Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250413173758.12068-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 7b96c99879a62..53a13dd4d2d4c 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -9,7 +9,7 @@ use crate::{ str::CStr, types::{ARef, Opaque}, }; -use core::{fmt, ptr}; +use core::{fmt, marker::PhantomData, ptr}; #[cfg(CONFIG_PRINTK)] use crate::c_str; @@ -42,7 +42,7 @@ use crate::c_str; /// `bindings::device::release` is valid to be called from any thread, hence `ARef` can be /// dropped from any thread. #[repr(transparent)] -pub struct Device(Opaque); +pub struct Device(Opaque, PhantomData); impl Device { /// Creates a new reference-counted abstraction instance of an existing `struct device` pointer. @@ -59,7 +59,9 @@ impl Device { // SAFETY: By the safety requirements ptr is valid unsafe { Self::as_ref(ptr) }.into() } +} +impl Device { /// Obtain the raw `struct device *`. pub(crate) fn as_raw(&self) -> *mut bindings::device { self.0.get() @@ -189,6 +191,11 @@ impl Device { } } +// SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic +// argument. +kernel::impl_device_context_deref!(unsafe { Device }); +kernel::impl_device_context_into_aref!(Device); + // SAFETY: Instances of `Device` are always reference-counted. unsafe impl crate::types::AlwaysRefCounted for Device { fn inc_ref(&self) { -- GitLab From da6c47c6cb45ffb392e016631c08098ad2a6d418 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:36:59 +0200 Subject: [PATCH 306/899] rust: platform: preserve device context in AsRef Since device::Device has a generic over its context, preserve this device context in AsRef. For instance, when calling platform::Device the new AsRef implementation returns device::Device. Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250413173758.12068-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/platform.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 9476b717c4255..b1c48cd95cd69 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -183,7 +183,7 @@ pub struct Device( PhantomData, ); -impl Device { +impl Device { fn as_raw(&self) -> *mut bindings::platform_device { self.0.get() } @@ -207,8 +207,8 @@ unsafe impl crate::types::AlwaysRefCounted for Device { } } -impl AsRef for Device { - fn as_ref(&self) -> &device::Device { +impl AsRef> for Device { + fn as_ref(&self) -> &device::Device { // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid // `struct platform_device`. let dev = unsafe { addr_of_mut!((*self.as_raw()).dev) }; -- GitLab From 3edaefbf2b1beb9ae1cb2a842f455157b951e9f1 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:37:00 +0200 Subject: [PATCH 307/899] rust: pci: preserve device context in AsRef Since device::Device has a generic over its context, preserve this device context in AsRef. For instance, when calling pci::Device the new AsRef implementation returns device::Device. Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250413173758.12068-6-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/pci.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 7c6ec05383e0d..1234b0c4a4035 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -360,11 +360,13 @@ impl Deref for Bar { } } -impl Device { +impl Device { fn as_raw(&self) -> *mut bindings::pci_dev { self.0.get() } +} +impl Device { /// Returns the PCI vendor ID. pub fn vendor_id(&self) -> u16 { // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`. @@ -440,8 +442,8 @@ unsafe impl crate::types::AlwaysRefCounted for Device { } } -impl AsRef for Device { - fn as_ref(&self) -> &device::Device { +impl AsRef> for Device { + fn as_ref(&self) -> &device::Device { // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid // `struct pci_dev`. let dev = unsafe { addr_of_mut!((*self.as_raw()).dev) }; -- GitLab From f933b7489ffca25c9a33b65441679ee3d2943024 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:37:01 +0200 Subject: [PATCH 308/899] rust: device: implement Bound device context The Bound device context indicates that a device is bound to a driver. It must be used for APIs that require the device to be bound, such as Devres or dma::CoherentAllocation. Implement Bound and add the corresponding Deref hierarchy, as well as the corresponding ARef conversion for this device context. Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250413173758.12068-7-dakr@kernel.org [ Add missing `::` prefix in macros. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 53a13dd4d2d4c..0353c5552769c 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -232,13 +232,19 @@ pub struct Normal; /// any of the bus callbacks, such as `probe()`. pub struct Core; +/// The [`Bound`] context is the context of a bus specific device reference when it is guaranteed to +/// be bound for the duration of its lifetime. +pub struct Bound; + mod private { pub trait Sealed {} + impl Sealed for super::Bound {} impl Sealed for super::Core {} impl Sealed for super::Normal {} } +impl DeviceContext for Bound {} impl DeviceContext for Core {} impl DeviceContext for Normal {} @@ -281,7 +287,14 @@ macro_rules! impl_device_context_deref { // `__impl_device_context_deref!`. ::kernel::__impl_device_context_deref!(unsafe { $device, - $crate::device::Core => $crate::device::Normal + $crate::device::Core => $crate::device::Bound + }); + + // SAFETY: This macro has the exact same safety requirement as + // `__impl_device_context_deref!`. + ::kernel::__impl_device_context_deref!(unsafe { + $device, + $crate::device::Bound => $crate::device::Normal }); }; } @@ -304,6 +317,7 @@ macro_rules! __impl_device_context_into_aref { macro_rules! impl_device_context_into_aref { ($device:tt) => { ::kernel::__impl_device_context_into_aref!($crate::device::Core, $device); + ::kernel::__impl_device_context_into_aref!($crate::device::Bound, $device); }; } -- GitLab From f2a399d7b67c4a6fc0f8e59d1a9eb484efc71b5c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:37:02 +0200 Subject: [PATCH 309/899] rust: pci: move iomap_region() to impl Device Require the Bound device context to be able to call iomap_region() and iomap_region_sized(). Creating I/O mapping requires the device to be bound. Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250413173758.12068-8-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/pci.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 1234b0c4a4035..3664d35b8e796 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -390,7 +390,9 @@ impl Device { // - by its type invariant `self.as_raw` is always a valid pointer to a `struct pci_dev`. Ok(unsafe { bindings::pci_resource_len(self.as_raw(), bar.try_into()?) }) } +} +impl Device { /// Mapps an entire PCI-BAR after performing a region-request on it. I/O operation bound checks /// can be performed on compile time for offsets (plus the requested type size) < SIZE. pub fn iomap_region_sized( -- GitLab From f720efda2db5e609b32100c25d9cf383f082d945 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:37:03 +0200 Subject: [PATCH 310/899] rust: devres: require a bound device Require the Bound device context to be able to a new Devres container. This ensures that we can't register devres callbacks for unbound devices. Link: https://lore.kernel.org/r/20250413173758.12068-9-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/devres.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index ddb1ce4a78d94..1e58f5d220441 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -8,7 +8,7 @@ use crate::{ alloc::Flags, bindings, - device::Device, + device::{Bound, Device}, error::{Error, Result}, ffi::c_void, prelude::*, @@ -45,7 +45,7 @@ struct DevresInner { /// # Example /// /// ```no_run -/// # use kernel::{bindings, c_str, device::Device, devres::Devres, io::{Io, IoRaw}}; +/// # use kernel::{bindings, c_str, device::{Bound, Device}, devres::Devres, io::{Io, IoRaw}}; /// # use core::ops::Deref; /// /// // See also [`pci::Bar`] for a real example. @@ -83,13 +83,10 @@ struct DevresInner { /// unsafe { Io::from_raw(&self.0) } /// } /// } -/// # fn no_run() -> Result<(), Error> { -/// # // SAFETY: Invalid usage; just for the example to get an `ARef` instance. -/// # let dev = unsafe { Device::get_device(core::ptr::null_mut()) }; -/// +/// # fn no_run(dev: &Device) -> Result<(), Error> { /// // SAFETY: Invalid usage for example purposes. /// let iomem = unsafe { IoMem::<{ core::mem::size_of::() }>::new(0xBAAAAAAD)? }; -/// let devres = Devres::new(&dev, iomem, GFP_KERNEL)?; +/// let devres = Devres::new(dev, iomem, GFP_KERNEL)?; /// /// let res = devres.try_access().ok_or(ENXIO)?; /// res.write8(0x42, 0x0); @@ -99,7 +96,7 @@ struct DevresInner { pub struct Devres(Arc>); impl DevresInner { - fn new(dev: &Device, data: T, flags: Flags) -> Result>> { + fn new(dev: &Device, data: T, flags: Flags) -> Result>> { let inner = Arc::pin_init( pin_init!( DevresInner { dev: dev.into(), @@ -171,7 +168,7 @@ impl DevresInner { impl Devres { /// Creates a new [`Devres`] instance of the given `data`. The `data` encapsulated within the /// returned `Devres` instance' `data` will be revoked once the device is detached. - pub fn new(dev: &Device, data: T, flags: Flags) -> Result { + pub fn new(dev: &Device, data: T, flags: Flags) -> Result { let inner = DevresInner::new(dev, data, flags)?; Ok(Devres(inner)) @@ -179,7 +176,7 @@ impl Devres { /// Same as [`Devres::new`], but does not return a `Devres` instance. Instead the given `data` /// is owned by devres and will be revoked / dropped, once the device is detached. - pub fn new_foreign_owned(dev: &Device, data: T, flags: Flags) -> Result { + pub fn new_foreign_owned(dev: &Device, data: T, flags: Flags) -> Result { let _ = DevresInner::new(dev, data, flags)?; Ok(()) -- GitLab From 7bd1710aac0571d4722f1429a9332c57b3a8feaf Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 13 Apr 2025 19:37:04 +0200 Subject: [PATCH 311/899] rust: dma: require a bound device Require the Bound device context to be able to create new dma::CoherentAllocation instances. DMA memory allocations are only valid to be created for bound devices. Link: https://lore.kernel.org/r/20250413173758.12068-10-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 8cdc76043ee77..605e01e357156 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -6,7 +6,7 @@ use crate::{ bindings, build_assert, - device::Device, + device::{Bound, Device}, error::code::*, error::Result, transmute::{AsBytes, FromBytes}, @@ -22,10 +22,10 @@ use crate::{ /// # Examples /// /// ``` -/// use kernel::device::Device; +/// # use kernel::device::{Bound, Device}; /// use kernel::dma::{attrs::*, CoherentAllocation}; /// -/// # fn test(dev: &Device) -> Result { +/// # fn test(dev: &Device) -> Result { /// let attribs = DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_WARN; /// let c: CoherentAllocation = /// CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, attribs)?; @@ -143,16 +143,16 @@ impl CoherentAllocation { /// # Examples /// /// ``` - /// use kernel::device::Device; + /// # use kernel::device::{Bound, Device}; /// use kernel::dma::{attrs::*, CoherentAllocation}; /// - /// # fn test(dev: &Device) -> Result { + /// # fn test(dev: &Device) -> Result { /// let c: CoherentAllocation = /// CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?; /// # Ok::<(), Error>(()) } /// ``` pub fn alloc_attrs( - dev: &Device, + dev: &Device, count: usize, gfp_flags: kernel::alloc::Flags, dma_attrs: Attrs, @@ -194,7 +194,7 @@ impl CoherentAllocation { /// Performs the same functionality as [`CoherentAllocation::alloc_attrs`], except the /// `dma_attrs` is 0 by default. pub fn alloc_coherent( - dev: &Device, + dev: &Device, count: usize, gfp_flags: kernel::alloc::Flags, ) -> Result> { -- GitLab From 1e914a89ab7eef0b3890819847465f4c8c9d091d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:02 +0200 Subject: [PATCH 312/899] drm/mediatek: mtk_cec: Switch to register as module_platform_driver In preparation for adding a driver for the new HDMIv2 IP, and before splitting out the common bits from the HDMI driver, change the mtk_cec driver from being registered from the mtk_hdmi driver itself to be a module_platform_driver of its own. Besides being a cleanup, this also allows build flexibility by allowing to compile the CECv1 driver only when needed (for example, this is not needed nor used in HDMIv2 configurations). Reviewed-by: Alexandre Mergnat Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-5-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/Makefile | 4 ++-- drivers/gpu/drm/mediatek/mtk_cec.c | 7 ++++++- drivers/gpu/drm/mediatek/mtk_hdmi.c | 2 +- drivers/gpu/drm/mediatek/mtk_hdmi.h | 1 - 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index 32a2ed6c0cfef..bdd3a062f797a 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -21,10 +21,10 @@ mediatek-drm-y := mtk_crtc.o \ obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o -mediatek-drm-hdmi-objs := mtk_cec.o \ - mtk_hdmi.o \ +mediatek-drm-hdmi-objs := mtk_hdmi.o \ mtk_hdmi_ddc.o +obj-$(CONFIG_DRM_MEDIATEK_HDMI) += mtk_cec.o obj-$(CONFIG_DRM_MEDIATEK_HDMI) += mediatek-drm-hdmi.o obj-$(CONFIG_DRM_MEDIATEK_DP) += mtk_dp.o diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index b42c0d87eba3f..c7be530ca041f 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -12,7 +12,6 @@ #include #include "mtk_cec.h" -#include "mtk_hdmi.h" #include "mtk_drm_drv.h" #define TR_CONFIG 0x00 @@ -102,6 +101,7 @@ void mtk_cec_set_hpd_event(struct device *dev, cec->hpd_event = hpd_event; spin_unlock_irqrestore(&cec->lock, flags); } +EXPORT_SYMBOL_NS_GPL(mtk_cec_set_hpd_event, "DRM_MTK_HDMI_V1"); bool mtk_cec_hpd_high(struct device *dev) { @@ -112,6 +112,7 @@ bool mtk_cec_hpd_high(struct device *dev) return (status & (HDMI_PORD | HDMI_HTPLG)) == (HDMI_PORD | HDMI_HTPLG); } +EXPORT_SYMBOL_NS_GPL(mtk_cec_hpd_high, "DRM_MTK_HDMI_V1"); static void mtk_cec_htplg_irq_init(struct mtk_cec *cec) { @@ -247,3 +248,7 @@ struct platform_driver mtk_cec_driver = { .of_match_table = mtk_cec_of_ids, }, }; +module_platform_driver(mtk_cec_driver); + +MODULE_DESCRIPTION("MediaTek HDMI CEC Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 1358a5095fde4..f4476a12055bd 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1788,7 +1788,6 @@ static struct platform_driver mtk_hdmi_driver = { static struct platform_driver * const mtk_hdmi_drivers[] = { &mtk_hdmi_ddc_driver, - &mtk_cec_driver, &mtk_hdmi_driver, }; @@ -1810,3 +1809,4 @@ module_exit(mtk_hdmitx_exit); MODULE_AUTHOR("Jie Qiu "); MODULE_DESCRIPTION("MediaTek HDMI Driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS("DRM_MTK_HDMI_V1"); diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.h b/drivers/gpu/drm/mediatek/mtk_hdmi.h index 472bf141c92bb..e40bc46519950 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.h @@ -8,7 +8,6 @@ struct platform_driver; -extern struct platform_driver mtk_cec_driver; extern struct platform_driver mtk_hdmi_ddc_driver; #endif /* _MTK_HDMI_CTRL_H */ -- GitLab From a095d0d1e4849ee25c28d43d713a8f433d7f1f27 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 21 Mar 2025 22:47:54 +0100 Subject: [PATCH 313/899] rust: pci: impl TryFrom<&Device> for &pci::Device Implement TryFrom<&device::Device> for &Device. This allows us to get a &pci::Device from a generic &Device in a safe way; the conversion fails if the device' bus type does not match with the PCI bus type. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250321214826.140946-3-dakr@kernel.org [ Support device context types, use dev_is_pci() helper. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/helpers/pci.c | 5 +++++ rust/kernel/pci.rs | 22 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/rust/helpers/pci.c b/rust/helpers/pci.c index 8ba22f9114591..cd0e6bf2cc4d9 100644 --- a/rust/helpers/pci.c +++ b/rust/helpers/pci.c @@ -16,3 +16,8 @@ resource_size_t rust_helper_pci_resource_len(struct pci_dev *pdev, int bar) { return pci_resource_len(pdev, bar); } + +bool rust_helper_dev_is_pci(const struct device *dev) +{ + return dev_is_pci(dev); +} diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 3664d35b8e796..38fc8d5ffbf9d 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -6,7 +6,7 @@ use crate::{ alloc::flags::*, - bindings, device, + bindings, container_of, device, device_id::RawDeviceId, devres::Devres, driver, @@ -455,6 +455,26 @@ impl AsRef> for Device { } } +impl TryFrom<&device::Device> for &Device { + type Error = kernel::error::Error; + + fn try_from(dev: &device::Device) -> Result { + // SAFETY: By the type invariant of `Device`, `dev.as_raw()` is a valid pointer to a + // `struct device`. + if !unsafe { bindings::dev_is_pci(dev.as_raw()) } { + return Err(EINVAL); + } + + // SAFETY: We've just verified that the bus type of `dev` equals `bindings::pci_bus_type`, + // hence `dev` must be embedded in a valid `struct pci_dev` as guaranteed by the + // corresponding C code. + let pdev = unsafe { container_of!(dev.as_raw(), bindings::pci_dev, dev) }; + + // SAFETY: `pdev` is a valid pointer to a `struct pci_dev`. + Ok(unsafe { &*pdev.cast() }) + } +} + // SAFETY: A `Device` is always reference-counted and can be released from any thread. unsafe impl Send for Device {} -- GitLab From a38dfd60fe53a46a93517f02a061bb34c47946dc Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 21 Mar 2025 22:47:55 +0100 Subject: [PATCH 314/899] rust: platform: impl TryFrom<&Device> for &platform::Device Implement TryFrom<&device::Device> for &Device. This allows us to get a &platform::Device from a generic &Device in a safe way; the conversion fails if the device' bus type does not match with the platform bus type. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250321214826.140946-4-dakr@kernel.org [ Support device context types, use dev_is_platform() helper. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/helpers/platform.c | 5 +++++ rust/kernel/platform.rs | 22 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/rust/helpers/platform.c b/rust/helpers/platform.c index ab9b9f3173014..82171233d12fe 100644 --- a/rust/helpers/platform.c +++ b/rust/helpers/platform.c @@ -11,3 +11,8 @@ void rust_helper_platform_set_drvdata(struct platform_device *pdev, void *data) { platform_set_drvdata(pdev, data); } + +bool rust_helper_dev_is_platform(const struct device *dev) +{ + return dev_is_platform(dev); +} diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index b1c48cd95cd69..08849d92c0741 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -5,7 +5,7 @@ //! C header: [`include/linux/platform_device.h`](srctree/include/linux/platform_device.h) use crate::{ - bindings, device, driver, + bindings, container_of, device, driver, error::{to_result, Result}, of, prelude::*, @@ -218,6 +218,26 @@ impl AsRef> for Device { } } +impl TryFrom<&device::Device> for &Device { + type Error = kernel::error::Error; + + fn try_from(dev: &device::Device) -> Result { + // SAFETY: By the type invariant of `Device`, `dev.as_raw()` is a valid pointer to a + // `struct device`. + if !unsafe { bindings::dev_is_platform(dev.as_raw()) } { + return Err(EINVAL); + } + + // SAFETY: We've just verified that the bus type of `dev` equals + // `bindings::platform_bus_type`, hence `dev` must be embedded in a valid + // `struct platform_device` as guaranteed by the corresponding C code. + let pdev = unsafe { container_of!(dev.as_raw(), bindings::platform_device, dev) }; + + // SAFETY: `pdev` is a valid pointer to a `struct platform_device`. + Ok(unsafe { &*pdev.cast() }) + } +} + // SAFETY: A `Device` is always reference-counted and can be released from any thread. unsafe impl Send for Device {} -- GitLab From 9647b6c5095aa54701df009a7335d07013cd13c4 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 14 Apr 2025 15:18:04 +0200 Subject: [PATCH 315/899] rust: types: add `Opaque::zeroed` Analogous to `Opaque::uninit` add `Opaque::zeroed`, which sets the corresponding memory to zero. In contrast to `Opaque::uninit`, the corresponding value, depending on its type, may be initialized. Acked-by: Greg Kroah-Hartman Reviewed-by: Alice Ryhl Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250414131934.28418-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/types.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 9d0471afc9648..eee387727d1ae 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -329,6 +329,14 @@ impl Opaque { } } + /// Creates a new zeroed opaque value. + pub const fn zeroed() -> Self { + Self { + value: UnsafeCell::new(MaybeUninit::zeroed()), + _pin: PhantomPinned, + } + } + /// Create an opaque pin-initializer from the given pin-initializer. pub fn pin_init(slot: impl PinInit) -> impl PinInit { Self::ffi_init(|ptr: *mut T| { -- GitLab From a4c9f71e3440dc6e944fa05bb7fcb3949fe5bcd4 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 14 Apr 2025 15:18:05 +0200 Subject: [PATCH 316/899] rust: device: implement Device::parent() Device::parent() returns a reference to the device' parent device, if any. Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250414131934.28418-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 0353c5552769c..0d237cebd65eb 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -67,6 +67,25 @@ impl Device { self.0.get() } + /// Returns a reference to the parent device, if any. + #[expect(unused)] + pub(crate) fn parent(&self) -> Option<&Self> { + // SAFETY: + // - By the type invariant `self.as_raw()` is always valid. + // - The parent device is only ever set at device creation. + let parent = unsafe { (*self.as_raw()).parent }; + + if parent.is_null() { + None + } else { + // SAFETY: + // - Since `parent` is not NULL, it must be a valid pointer to a `struct device`. + // - `parent` is valid for the lifetime of `self`, since a `struct device` holds a + // reference count of its parent. + Some(unsafe { Self::as_ref(parent) }) + } + } + /// Convert a raw C `struct device` pointer to a `&'a Device`. /// /// # Safety -- GitLab From ce735e73dd59b169b877cedd0753297c81c2a091 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 14 Apr 2025 15:18:06 +0200 Subject: [PATCH 317/899] rust: auxiliary: add auxiliary device / driver abstractions Implement the basic auxiliary abstractions required to implement a driver matching an auxiliary device. The design and implementation is analogous to PCI and platform and is based on the generic device / driver abstractions. Acked-by: Greg Kroah-Hartman Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250414131934.28418-4-dakr@kernel.org [ Fix typos, `let _ =` => `drop()`, use `kernel::ffi`. - Danilo ] Signed-off-by: Danilo Krummrich --- MAINTAINERS | 2 + rust/bindings/bindings_helper.h | 1 + rust/helpers/auxiliary.c | 23 +++ rust/helpers/helpers.c | 1 + rust/kernel/auxiliary.rs | 274 ++++++++++++++++++++++++++++++++ rust/kernel/device.rs | 1 - rust/kernel/lib.rs | 2 + 7 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 rust/helpers/auxiliary.c create mode 100644 rust/kernel/auxiliary.rs diff --git a/MAINTAINERS b/MAINTAINERS index 96b8270495018..5a19522931192 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3872,6 +3872,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h +F: rust/helpers/auxiliary.c +F: rust/kernel/auxiliary.rs AUXILIARY DISPLAY DRIVERS M: Andy Shevchenko diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index ab37e1d35c70d..8a2add69e5d66 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/rust/helpers/auxiliary.c b/rust/helpers/auxiliary.c new file mode 100644 index 0000000000000..0db3860d774ec --- /dev/null +++ b/rust/helpers/auxiliary.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +void rust_helper_auxiliary_set_drvdata(struct auxiliary_device *adev, void *data) +{ + auxiliary_set_drvdata(adev, data); +} + +void *rust_helper_auxiliary_get_drvdata(struct auxiliary_device *adev) +{ + return auxiliary_get_drvdata(adev); +} + +void rust_helper_auxiliary_device_uninit(struct auxiliary_device *adev) +{ + return auxiliary_device_uninit(adev); +} + +void rust_helper_auxiliary_device_delete(struct auxiliary_device *adev) +{ + return auxiliary_device_delete(adev); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index e1c21eba9b15b..6b279279cb129 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -7,6 +7,7 @@ * Sorted alphabetically. */ +#include "auxiliary.c" #include "blk.c" #include "bug.c" #include "build_assert.c" diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs new file mode 100644 index 0000000000000..e15596263c224 --- /dev/null +++ b/rust/kernel/auxiliary.rs @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Abstractions for the auxiliary bus. +//! +//! C header: [`include/linux/auxiliary_bus.h`](srctree/include/linux/auxiliary_bus.h) + +use crate::{ + bindings, device, + device_id::RawDeviceId, + driver, + error::{to_result, Result}, + prelude::*, + str::CStr, + types::{ForeignOwnable, Opaque}, + ThisModule, +}; +use core::{ + marker::PhantomData, + ptr::{addr_of_mut, NonNull}, +}; + +/// An adapter for the registration of auxiliary drivers. +pub struct Adapter(T); + +// SAFETY: A call to `unregister` for a given instance of `RegType` is guaranteed to be valid if +// a preceding call to `register` has been successful. +unsafe impl driver::RegistrationOps for Adapter { + type RegType = bindings::auxiliary_driver; + + unsafe fn register( + adrv: &Opaque, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result { + // SAFETY: It's safe to set the fields of `struct auxiliary_driver` on initialization. + unsafe { + (*adrv.get()).name = name.as_char_ptr(); + (*adrv.get()).probe = Some(Self::probe_callback); + (*adrv.get()).remove = Some(Self::remove_callback); + (*adrv.get()).id_table = T::ID_TABLE.as_ptr(); + } + + // SAFETY: `adrv` is guaranteed to be a valid `RegType`. + to_result(unsafe { + bindings::__auxiliary_driver_register(adrv.get(), module.0, name.as_char_ptr()) + }) + } + + unsafe fn unregister(adrv: &Opaque) { + // SAFETY: `adrv` is guaranteed to be a valid `RegType`. + unsafe { bindings::auxiliary_driver_unregister(adrv.get()) } + } +} + +impl Adapter { + extern "C" fn probe_callback( + adev: *mut bindings::auxiliary_device, + id: *const bindings::auxiliary_device_id, + ) -> kernel::ffi::c_int { + // SAFETY: The auxiliary bus only ever calls the probe callback with a valid pointer to a + // `struct auxiliary_device`. + // + // INVARIANT: `adev` is valid for the duration of `probe_callback()`. + let adev = unsafe { &*adev.cast::>() }; + + // SAFETY: `DeviceId` is a `#[repr(transparent)`] wrapper of `struct auxiliary_device_id` + // and does not add additional invariants, so it's safe to transmute. + let id = unsafe { &*id.cast::() }; + let info = T::ID_TABLE.info(id.index()); + + match T::probe(adev, info) { + Ok(data) => { + // Let the `struct auxiliary_device` own a reference of the driver's private data. + // SAFETY: By the type invariant `adev.as_raw` returns a valid pointer to a + // `struct auxiliary_device`. + unsafe { bindings::auxiliary_set_drvdata(adev.as_raw(), data.into_foreign()) }; + } + Err(err) => return Error::to_errno(err), + } + + 0 + } + + extern "C" fn remove_callback(adev: *mut bindings::auxiliary_device) { + // SAFETY: The auxiliary bus only ever calls the remove callback with a valid pointer to a + // `struct auxiliary_device`. + let ptr = unsafe { bindings::auxiliary_get_drvdata(adev) }; + + // SAFETY: `remove_callback` is only ever called after a successful call to + // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized + // `KBox` pointer created through `KBox::into_foreign`. + drop(unsafe { KBox::::from_foreign(ptr) }); + } +} + +/// Declares a kernel module that exposes a single auxiliary driver. +#[macro_export] +macro_rules! module_auxiliary_driver { + ($($f:tt)*) => { + $crate::module_driver!(, $crate::auxiliary::Adapter, { $($f)* }); + }; +} + +/// Abstraction for `bindings::auxiliary_device_id`. +#[repr(transparent)] +#[derive(Clone, Copy)] +pub struct DeviceId(bindings::auxiliary_device_id); + +impl DeviceId { + /// Create a new [`DeviceId`] from name. + pub const fn new(modname: &'static CStr, name: &'static CStr) -> Self { + let name = name.as_bytes_with_nul(); + let modname = modname.as_bytes_with_nul(); + + // TODO: Replace with `bindings::auxiliary_device_id::default()` once stabilized for + // `const`. + // + // SAFETY: FFI type is valid to be zero-initialized. + let mut id: bindings::auxiliary_device_id = unsafe { core::mem::zeroed() }; + + let mut i = 0; + while i < modname.len() { + id.name[i] = modname[i]; + i += 1; + } + + // Reuse the space of the NULL terminator. + id.name[i - 1] = b'.'; + + let mut j = 0; + while j < name.len() { + id.name[i] = name[j]; + i += 1; + j += 1; + } + + Self(id) + } +} + +// SAFETY: +// * `DeviceId` is a `#[repr(transparent)`] wrapper of `auxiliary_device_id` and does not add +// additional invariants, so it's safe to transmute to `RawType`. +// * `DRIVER_DATA_OFFSET` is the offset to the `driver_data` field. +unsafe impl RawDeviceId for DeviceId { + type RawType = bindings::auxiliary_device_id; + + const DRIVER_DATA_OFFSET: usize = + core::mem::offset_of!(bindings::auxiliary_device_id, driver_data); + + fn index(&self) -> usize { + self.0.driver_data + } +} + +/// IdTable type for auxiliary drivers. +pub type IdTable = &'static dyn kernel::device_id::IdTable; + +/// Create a auxiliary `IdTable` with its alias for modpost. +#[macro_export] +macro_rules! auxiliary_device_table { + ($table_name:ident, $module_table_name:ident, $id_info_type: ty, $table_data: expr) => { + const $table_name: $crate::device_id::IdArray< + $crate::auxiliary::DeviceId, + $id_info_type, + { $table_data.len() }, + > = $crate::device_id::IdArray::new($table_data); + + $crate::module_device_table!("auxiliary", $module_table_name, $table_name); + }; +} + +/// The auxiliary driver trait. +/// +/// Drivers must implement this trait in order to get an auxiliary driver registered. +pub trait Driver { + /// The type holding information about each device id supported by the driver. + /// + /// TODO: Use associated_type_defaults once stabilized: + /// + /// type IdInfo: 'static = (); + type IdInfo: 'static; + + /// The table of device ids supported by the driver. + const ID_TABLE: IdTable; + + /// Auxiliary driver probe. + /// + /// Called when an auxiliary device is matches a corresponding driver. + fn probe(dev: &Device, id_info: &Self::IdInfo) -> Result>>; +} + +/// The auxiliary device representation. +/// +/// This structure represents the Rust abstraction for a C `struct auxiliary_device`. The +/// implementation abstracts the usage of an already existing C `struct auxiliary_device` within +/// Rust code that we get passed from the C side. +/// +/// # Invariants +/// +/// A [`Device`] instance represents a valid `struct auxiliary_device` created by the C portion of +/// the kernel. +#[repr(transparent)] +pub struct Device( + Opaque, + PhantomData, +); + +impl Device { + fn as_raw(&self) -> *mut bindings::auxiliary_device { + self.0.get() + } + + /// Returns the auxiliary device' id. + pub fn id(&self) -> u32 { + // SAFETY: By the type invariant `self.as_raw()` is a valid pointer to a + // `struct auxiliary_device`. + unsafe { (*self.as_raw()).id } + } + + /// Returns a reference to the parent [`device::Device`], if any. + pub fn parent(&self) -> Option<&device::Device> { + let ptr: *const Self = self; + // CAST: `Device` types are transparent to each other. + let ptr: *const Device = ptr.cast(); + // SAFETY: `ptr` was derived from `&self`. + let this = unsafe { &*ptr }; + + this.as_ref().parent() + } +} + +// SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic +// argument. +kernel::impl_device_context_deref!(unsafe { Device }); +kernel::impl_device_context_into_aref!(Device); + +// SAFETY: Instances of `Device` are always reference-counted. +unsafe impl crate::types::AlwaysRefCounted for Device { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. + unsafe { bindings::get_device(self.as_ref().as_raw()) }; + } + + unsafe fn dec_ref(obj: NonNull) { + // CAST: `Self` a transparent wrapper of `bindings::auxiliary_device`. + let adev: *mut bindings::auxiliary_device = obj.cast().as_ptr(); + + // SAFETY: By the type invariant of `Self`, `adev` is a pointer to a valid + // `struct auxiliary_device`. + let dev = unsafe { addr_of_mut!((*adev).dev) }; + + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::put_device(dev) } + } +} + +impl AsRef> for Device { + fn as_ref(&self) -> &device::Device { + // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid + // `struct auxiliary_device`. + let dev = unsafe { addr_of_mut!((*self.as_raw()).dev) }; + + // SAFETY: `dev` points to a valid `struct device`. + unsafe { device::Device::as_ref(dev) } + } +} + +// SAFETY: A `Device` is always reference-counted and can be released from any thread. +unsafe impl Send for Device {} + +// SAFETY: `Device` can be shared among threads because all methods of `Device` +// (i.e. `Device) are thread safe. +unsafe impl Sync for Device {} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 0d237cebd65eb..40c1f549b0bae 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -68,7 +68,6 @@ impl Device { } /// Returns a reference to the parent device, if any. - #[expect(unused)] pub(crate) fn parent(&self) -> Option<&Self> { // SAFETY: // - By the type invariant `self.as_raw()` is always valid. diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index de07aadd1ff5f..55a8dfeece0b2 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -38,6 +38,8 @@ extern crate self as kernel; pub use ffi; pub mod alloc; +#[cfg(CONFIG_AUXILIARY_BUS)] +pub mod auxiliary; #[cfg(CONFIG_BLOCK)] pub mod block; #[doc(hidden)] -- GitLab From 0d1803d25f8c7586beb3843c8efbb83f24ce2539 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 14 Apr 2025 15:18:07 +0200 Subject: [PATCH 318/899] rust: auxiliary: add auxiliary registration Implement the `auxiliary::Registration` type, which provides an API to create and register new auxiliary devices in the system. Acked-by: Greg Kroah-Hartman Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250414131934.28418-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/auxiliary.rs | 88 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs index e15596263c224..5c072960dee02 100644 --- a/rust/kernel/auxiliary.rs +++ b/rust/kernel/auxiliary.rs @@ -5,7 +5,7 @@ //! C header: [`include/linux/auxiliary_bus.h`](srctree/include/linux/auxiliary_bus.h) use crate::{ - bindings, device, + bindings, container_of, device, device_id::RawDeviceId, driver, error::{to_result, Result}, @@ -230,6 +230,18 @@ impl Device { } } +impl Device { + extern "C" fn release(dev: *mut bindings::device) { + // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` + // embedded in `struct auxiliary_device`. + let adev = unsafe { container_of!(dev, bindings::auxiliary_device, dev) }.cast_mut(); + + // SAFETY: `adev` points to the memory that has been allocated in `Registration::new`, via + // `KBox::new(Opaque::::zeroed(), GFP_KERNEL)`. + let _ = unsafe { KBox::>::from_raw(adev.cast()) }; + } +} + // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic // argument. kernel::impl_device_context_deref!(unsafe { Device }); @@ -272,3 +284,77 @@ unsafe impl Send for Device {} // SAFETY: `Device` can be shared among threads because all methods of `Device` // (i.e. `Device) are thread safe. unsafe impl Sync for Device {} + +/// The registration of an auxiliary device. +/// +/// This type represents the registration of a [`struct auxiliary_device`]. When an instance of this +/// type is dropped, its respective auxiliary device will be unregistered from the system. +/// +/// # Invariants +/// +/// `self.0` always holds a valid pointer to an initialized and registered +/// [`struct auxiliary_device`]. +pub struct Registration(NonNull); + +impl Registration { + /// Create and register a new auxiliary device. + pub fn new(parent: &device::Device, name: &CStr, id: u32, modname: &CStr) -> Result { + let boxed = KBox::new(Opaque::::zeroed(), GFP_KERNEL)?; + let adev = boxed.get(); + + // SAFETY: It's safe to set the fields of `struct auxiliary_device` on initialization. + unsafe { + (*adev).dev.parent = parent.as_raw(); + (*adev).dev.release = Some(Device::release); + (*adev).name = name.as_char_ptr(); + (*adev).id = id; + } + + // SAFETY: `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`, + // which has not been initialized yet. + unsafe { bindings::auxiliary_device_init(adev) }; + + // Now that `adev` is initialized, leak the `Box`; the corresponding memory will be freed + // by `Device::release` when the last reference to the `struct auxiliary_device` is dropped. + let _ = KBox::into_raw(boxed); + + // SAFETY: + // - `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`, which has + // been initialialized, + // - `modname.as_char_ptr()` is a NULL terminated string. + let ret = unsafe { bindings::__auxiliary_device_add(adev, modname.as_char_ptr()) }; + if ret != 0 { + // SAFETY: `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`, + // which has been initialialized. + unsafe { bindings::auxiliary_device_uninit(adev) }; + + return Err(Error::from_errno(ret)); + } + + // SAFETY: `adev` is guaranteed to be non-null, since the `KBox` was allocated successfully. + // + // INVARIANT: The device will remain registered until `auxiliary_device_delete()` is called, + // which happens in `Self::drop()`. + Ok(Self(unsafe { NonNull::new_unchecked(adev) })) + } +} + +impl Drop for Registration { + fn drop(&mut self) { + // SAFETY: By the type invariant of `Self`, `self.0.as_ptr()` is a valid registered + // `struct auxiliary_device`. + unsafe { bindings::auxiliary_device_delete(self.0.as_ptr()) }; + + // This drops the reference we acquired through `auxiliary_device_init()`. + // + // SAFETY: By the type invariant of `Self`, `self.0.as_ptr()` is a valid registered + // `struct auxiliary_device`. + unsafe { bindings::auxiliary_device_uninit(self.0.as_ptr()) }; + } +} + +// SAFETY: A `Registration` of a `struct auxiliary_device` can be released from any thread. +unsafe impl Send for Registration {} + +// SAFETY: `Registration` does not expose any methods or fields that need synchronization. +unsafe impl Sync for Registration {} -- GitLab From 96609a1969f4ade45351ec368c65580c77592e8b Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 14 Apr 2025 15:18:08 +0200 Subject: [PATCH 319/899] samples: rust: add Rust auxiliary driver sample Add a sample Rust auxiliary driver based on a PCI driver for QEMU's "pci-testdev" device. The PCI driver only registers an auxiliary device, in order to make the corresponding auxiliary driver probe. Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250414131934.28418-6-dakr@kernel.org [ Use `ok_or()` when accessing auxiliary::Device::parent(). - Danilo ] Signed-off-by: Danilo Krummrich --- MAINTAINERS | 1 + samples/rust/Kconfig | 12 +++ samples/rust/Makefile | 1 + samples/rust/rust_driver_auxiliary.rs | 120 ++++++++++++++++++++++++++ 4 files changed, 134 insertions(+) create mode 100644 samples/rust/rust_driver_auxiliary.rs diff --git a/MAINTAINERS b/MAINTAINERS index 5a19522931192..34757b8641c33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3874,6 +3874,7 @@ F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h F: rust/helpers/auxiliary.c F: rust/kernel/auxiliary.rs +F: samples/rust/rust_driver_auxiliary.rs AUXILIARY DISPLAY DRIVERS M: Andy Shevchenko diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig index cad52b7120b51..43cb72d72631b 100644 --- a/samples/rust/Kconfig +++ b/samples/rust/Kconfig @@ -82,6 +82,18 @@ config SAMPLE_RUST_DRIVER_FAUX If unsure, say N. +config SAMPLE_RUST_DRIVER_AUXILIARY + tristate "Auxiliary Driver" + depends on AUXILIARY_BUS + depends on PCI + help + This option builds the Rust auxiliary driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_auxiliary. + + If unsure, say N. + config SAMPLE_RUST_HOSTPROGS bool "Host programs" help diff --git a/samples/rust/Makefile b/samples/rust/Makefile index c6a2479f7d9cf..6a466afd2a21e 100644 --- a/samples/rust/Makefile +++ b/samples/rust/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_SAMPLE_RUST_DMA) += rust_dma.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_AUXILIARY) += rust_driver_auxiliary.o rust_print-y := rust_print_main.o rust_print_events.o diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs new file mode 100644 index 0000000000000..3e15e6d002bba --- /dev/null +++ b/samples/rust/rust_driver_auxiliary.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust auxiliary driver sample (based on a PCI driver for QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{ + auxiliary, bindings, c_str, device::Core, driver, error::Error, pci, prelude::*, str::CStr, + InPlaceModule, +}; + +use pin_init::PinInit; + +const MODULE_NAME: &CStr = ::NAME; +const AUXILIARY_NAME: &CStr = c_str!("auxiliary"); + +struct AuxiliaryDriver; + +kernel::auxiliary_device_table!( + AUX_TABLE, + MODULE_AUX_TABLE, + ::IdInfo, + [(auxiliary::DeviceId::new(MODULE_NAME, AUXILIARY_NAME), ())] +); + +impl auxiliary::Driver for AuxiliaryDriver { + type IdInfo = (); + + const ID_TABLE: auxiliary::IdTable = &AUX_TABLE; + + fn probe(adev: &auxiliary::Device, _info: &Self::IdInfo) -> Result>> { + dev_info!( + adev.as_ref(), + "Probing auxiliary driver for auxiliary device with id={}\n", + adev.id() + ); + + ParentDriver::connect(adev)?; + + let this = KBox::new(Self, GFP_KERNEL)?; + + Ok(this.into()) + } +} + +struct ParentDriver { + _reg: [auxiliary::Registration; 2], +} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + ::IdInfo, + [( + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, 0x5), + () + )] +); + +impl pci::Driver for ParentDriver { + type IdInfo = (); + + const ID_TABLE: pci::IdTable = &PCI_TABLE; + + fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> Result>> { + let this = KBox::new( + Self { + _reg: [ + auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 0, MODULE_NAME)?, + auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 1, MODULE_NAME)?, + ], + }, + GFP_KERNEL, + )?; + + Ok(this.into()) + } +} + +impl ParentDriver { + fn connect(adev: &auxiliary::Device) -> Result<()> { + let parent = adev.parent().ok_or(EINVAL)?; + let pdev: &pci::Device = parent.try_into()?; + + dev_info!( + adev.as_ref(), + "Connect auxiliary {} with parent: VendorID={:#x}, DeviceID={:#x}\n", + adev.id(), + pdev.vendor_id(), + pdev.device_id() + ); + + Ok(()) + } +} + +#[pin_data] +struct SampleModule { + #[pin] + _pci_driver: driver::Registration>, + #[pin] + _aux_driver: driver::Registration>, +} + +impl InPlaceModule for SampleModule { + fn init(module: &'static kernel::ThisModule) -> impl PinInit { + try_pin_init!(Self { + _pci_driver <- driver::Registration::new(MODULE_NAME, module), + _aux_driver <- driver::Registration::new(MODULE_NAME, module), + }) + } +} + +module! { + type: SampleModule, + name: "rust_driver_auxiliary", + author: "Danilo Krummrich", + description: "Rust auxiliary driver", + license: "GPL v2", +} -- GitLab From c241118b62164786303e6614378feb7d1fda4f54 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:03 +0200 Subject: [PATCH 320/899] drm/mediatek: mtk_hdmi_ddc: Switch to register as module_platform_driver In preparation for adding a driver for the new HDMIv2 IP, and before splitting out the common bits from the HDMI driver, change the mtk_hdmi_ddc driver from being registered from the HDMI driver itself to be a module_platform_driver of its own. With this change, there are no more users of the mtk_hdmi.h header so, while at it, also remove it. Reviewed-by: Alexandre Mergnat Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-6-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/Makefile | 6 ++---- drivers/gpu/drm/mediatek/mtk_hdmi.c | 2 -- drivers/gpu/drm/mediatek/mtk_hdmi.h | 13 ------------- drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c | 2 +- 4 files changed, 3 insertions(+), 20 deletions(-) delete mode 100644 drivers/gpu/drm/mediatek/mtk_hdmi.h diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index bdd3a062f797a..43afd0a26d145 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -21,10 +21,8 @@ mediatek-drm-y := mtk_crtc.o \ obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o -mediatek-drm-hdmi-objs := mtk_hdmi.o \ - mtk_hdmi_ddc.o - obj-$(CONFIG_DRM_MEDIATEK_HDMI) += mtk_cec.o -obj-$(CONFIG_DRM_MEDIATEK_HDMI) += mediatek-drm-hdmi.o +obj-$(CONFIG_DRM_MEDIATEK_HDMI) += mtk_hdmi.o +obj-$(CONFIG_DRM_MEDIATEK_HDMI) += mtk_hdmi_ddc.o obj-$(CONFIG_DRM_MEDIATEK_DP) += mtk_dp.o diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index f4476a12055bd..a6d719fa9017c 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -31,7 +31,6 @@ #include #include "mtk_cec.h" -#include "mtk_hdmi.h" #include "mtk_hdmi_regs.h" #define NCTS_BYTES 7 @@ -1787,7 +1786,6 @@ static struct platform_driver mtk_hdmi_driver = { }; static struct platform_driver * const mtk_hdmi_drivers[] = { - &mtk_hdmi_ddc_driver, &mtk_hdmi_driver, }; diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.h b/drivers/gpu/drm/mediatek/mtk_hdmi.h deleted file mode 100644 index e40bc46519950..0000000000000 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2014 MediaTek Inc. - * Author: Jie Qiu - */ -#ifndef _MTK_HDMI_CTRL_H -#define _MTK_HDMI_CTRL_H - -struct platform_driver; - -extern struct platform_driver mtk_hdmi_ddc_driver; - -#endif /* _MTK_HDMI_CTRL_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c index 07db680678444..6358e1af69b49 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c @@ -20,7 +20,6 @@ #include #include "mtk_drm_drv.h" -#include "mtk_hdmi.h" #define SIF1_CLOK (288) #define DDC_DDCMCTL0 (0x0) @@ -337,6 +336,7 @@ struct platform_driver mtk_hdmi_ddc_driver = { .of_match_table = mtk_hdmi_ddc_match, }, }; +module_platform_driver(mtk_hdmi_ddc_driver); MODULE_AUTHOR("Jie Qiu "); MODULE_DESCRIPTION("MediaTek HDMI DDC Driver"); -- GitLab From 957cac22a82d33ae2cede1c59dc6844139fd92b3 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:04 +0200 Subject: [PATCH 321/899] drm/mediatek: mtk_hdmi: Convert to module_platform_driver macro Now that all of the mtk_hdmi subdrivers are a platform driver on their own it is possible to remove the custom init/exit functions in this driver and just use the module_platform_driver() macro. While at it, also compress struct of_device_id entries and remove stray commas in mtk_hdmi_driver assignments. Reviewed-by: Alexandre Mergnat Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-7-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index a6d719fa9017c..68b1632a2d00e 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1784,25 +1784,7 @@ static struct platform_driver mtk_hdmi_driver = { .pm = &mtk_hdmi_pm_ops, }, }; - -static struct platform_driver * const mtk_hdmi_drivers[] = { - &mtk_hdmi_driver, -}; - -static int __init mtk_hdmitx_init(void) -{ - return platform_register_drivers(mtk_hdmi_drivers, - ARRAY_SIZE(mtk_hdmi_drivers)); -} - -static void __exit mtk_hdmitx_exit(void) -{ - platform_unregister_drivers(mtk_hdmi_drivers, - ARRAY_SIZE(mtk_hdmi_drivers)); -} - -module_init(mtk_hdmitx_init); -module_exit(mtk_hdmitx_exit); +module_platform_driver(mtk_hdmi_driver); MODULE_AUTHOR("Jie Qiu "); MODULE_DESCRIPTION("MediaTek HDMI Driver"); -- GitLab From d6e25b3590a0679bca1a1becc04e3999f1dfd461 Mon Sep 17 00:00:00 2001 From: Guillaume Ranquet Date: Tue, 15 Apr 2025 12:43:05 +0200 Subject: [PATCH 322/899] drm/mediatek: hdmi: Use regmap instead of iomem for main registers In preparation for the addition of a new version of the HDMI IP which will need to share its iospace between multiple subdrivers, and in preparation for moving out the common bits between the two, migrate this driver to fully use regmap. Signed-off-by: Guillaume Ranquet Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-8-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 171 +++++++++++----------------- 1 file changed, 65 insertions(+), 106 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 68b1632a2d00e..5a1552f5a519d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -164,7 +164,7 @@ struct mtk_hdmi { bool dvi_mode; struct regmap *sys_regmap; unsigned int sys_offset; - void __iomem *regs; + struct regmap *regs; struct platform_device *audio_pdev; struct hdmi_audio_param aud_param; bool audio_enable; @@ -180,50 +180,10 @@ static inline struct mtk_hdmi *hdmi_ctx_from_bridge(struct drm_bridge *b) return container_of(b, struct mtk_hdmi, bridge); } -static u32 mtk_hdmi_read(struct mtk_hdmi *hdmi, u32 offset) -{ - return readl(hdmi->regs + offset); -} - -static void mtk_hdmi_write(struct mtk_hdmi *hdmi, u32 offset, u32 val) -{ - writel(val, hdmi->regs + offset); -} - -static void mtk_hdmi_clear_bits(struct mtk_hdmi *hdmi, u32 offset, u32 bits) -{ - void __iomem *reg = hdmi->regs + offset; - u32 tmp; - - tmp = readl(reg); - tmp &= ~bits; - writel(tmp, reg); -} - -static void mtk_hdmi_set_bits(struct mtk_hdmi *hdmi, u32 offset, u32 bits) -{ - void __iomem *reg = hdmi->regs + offset; - u32 tmp; - - tmp = readl(reg); - tmp |= bits; - writel(tmp, reg); -} - -static void mtk_hdmi_mask(struct mtk_hdmi *hdmi, u32 offset, u32 val, u32 mask) -{ - void __iomem *reg = hdmi->regs + offset; - u32 tmp; - - tmp = readl(reg); - tmp = (tmp & ~mask) | (val & mask); - writel(tmp, reg); -} - static void mtk_hdmi_hw_vid_black(struct mtk_hdmi *hdmi, bool black) { - mtk_hdmi_mask(hdmi, VIDEO_CFG_4, black ? GEN_RGB : NORMAL_PATH, - VIDEO_SOURCE_SEL); + regmap_update_bits(hdmi->regs, VIDEO_SOURCE_SEL, + VIDEO_CFG_4, black ? GEN_RGB : NORMAL_PATH); } static void mtk_hdmi_hw_make_reg_writable(struct mtk_hdmi *hdmi, bool enable) @@ -258,12 +218,12 @@ static void mtk_hdmi_hw_1p4_version_enable(struct mtk_hdmi *hdmi, bool enable) static void mtk_hdmi_hw_aud_mute(struct mtk_hdmi *hdmi) { - mtk_hdmi_set_bits(hdmi, GRL_AUDIO_CFG, AUDIO_ZERO); + regmap_set_bits(hdmi->regs, GRL_AUDIO_CFG, AUDIO_ZERO); } static void mtk_hdmi_hw_aud_unmute(struct mtk_hdmi *hdmi) { - mtk_hdmi_clear_bits(hdmi, GRL_AUDIO_CFG, AUDIO_ZERO); + regmap_clear_bits(hdmi->regs, GRL_AUDIO_CFG, AUDIO_ZERO); } static void mtk_hdmi_hw_reset(struct mtk_hdmi *hdmi) @@ -272,25 +232,25 @@ static void mtk_hdmi_hw_reset(struct mtk_hdmi *hdmi) HDMI_RST, HDMI_RST); regmap_update_bits(hdmi->sys_regmap, hdmi->sys_offset + HDMI_SYS_CFG1C, HDMI_RST, 0); - mtk_hdmi_clear_bits(hdmi, GRL_CFG3, CFG3_CONTROL_PACKET_DELAY); + regmap_clear_bits(hdmi->regs, GRL_CFG3, CFG3_CONTROL_PACKET_DELAY); regmap_update_bits(hdmi->sys_regmap, hdmi->sys_offset + HDMI_SYS_CFG1C, ANLG_ON, ANLG_ON); } static void mtk_hdmi_hw_enable_notice(struct mtk_hdmi *hdmi, bool enable_notice) { - mtk_hdmi_mask(hdmi, GRL_CFG2, enable_notice ? CFG2_NOTICE_EN : 0, - CFG2_NOTICE_EN); + regmap_update_bits(hdmi->regs, GRL_CFG2, CFG2_NOTICE_EN, + enable_notice ? CFG2_NOTICE_EN : 0); } static void mtk_hdmi_hw_write_int_mask(struct mtk_hdmi *hdmi, u32 int_mask) { - mtk_hdmi_write(hdmi, GRL_INT_MASK, int_mask); + regmap_write(hdmi->regs, GRL_INT_MASK, int_mask); } static void mtk_hdmi_hw_enable_dvi_mode(struct mtk_hdmi *hdmi, bool enable) { - mtk_hdmi_mask(hdmi, GRL_CFG1, enable ? CFG1_DVI : 0, CFG1_DVI); + regmap_update_bits(hdmi->regs, GRL_CFG1, CFG1_DVI, enable ? CFG1_DVI : 0); } static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer, @@ -336,22 +296,22 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer, dev_err(hdmi->dev, "Unknown infoframe type %d\n", frame_type); return; } - mtk_hdmi_clear_bits(hdmi, ctrl_reg, ctrl_frame_en); - mtk_hdmi_write(hdmi, GRL_INFOFRM_TYPE, frame_type); - mtk_hdmi_write(hdmi, GRL_INFOFRM_VER, frame_ver); - mtk_hdmi_write(hdmi, GRL_INFOFRM_LNG, frame_len); + regmap_clear_bits(hdmi->regs, ctrl_reg, ctrl_frame_en); + regmap_write(hdmi->regs, GRL_INFOFRM_TYPE, frame_type); + regmap_write(hdmi->regs, GRL_INFOFRM_VER, frame_ver); + regmap_write(hdmi->regs, GRL_INFOFRM_LNG, frame_len); - mtk_hdmi_write(hdmi, GRL_IFM_PORT, checksum); + regmap_write(hdmi->regs, GRL_IFM_PORT, checksum); for (i = 0; i < frame_len; i++) - mtk_hdmi_write(hdmi, GRL_IFM_PORT, frame_data[i]); + regmap_write(hdmi->regs, GRL_IFM_PORT, frame_data[i]); - mtk_hdmi_set_bits(hdmi, ctrl_reg, ctrl_frame_en); + regmap_set_bits(hdmi->regs, ctrl_reg, ctrl_frame_en); } static void mtk_hdmi_hw_send_aud_packet(struct mtk_hdmi *hdmi, bool enable) { - mtk_hdmi_mask(hdmi, GRL_SHIFT_R2, enable ? 0 : AUDIO_PACKET_OFF, - AUDIO_PACKET_OFF); + regmap_update_bits(hdmi->regs, AUDIO_PACKET_OFF, + GRL_SHIFT_R2, enable ? 0 : AUDIO_PACKET_OFF); } static void mtk_hdmi_hw_config_sys(struct mtk_hdmi *hdmi) @@ -372,44 +332,44 @@ static void mtk_hdmi_hw_set_deep_color_mode(struct mtk_hdmi *hdmi) static void mtk_hdmi_hw_send_av_mute(struct mtk_hdmi *hdmi) { - mtk_hdmi_clear_bits(hdmi, GRL_CFG4, CTRL_AVMUTE); + regmap_clear_bits(hdmi->regs, GRL_CFG4, CTRL_AVMUTE); usleep_range(2000, 4000); - mtk_hdmi_set_bits(hdmi, GRL_CFG4, CTRL_AVMUTE); + regmap_set_bits(hdmi->regs, GRL_CFG4, CTRL_AVMUTE); } static void mtk_hdmi_hw_send_av_unmute(struct mtk_hdmi *hdmi) { - mtk_hdmi_mask(hdmi, GRL_CFG4, CFG4_AV_UNMUTE_EN, - CFG4_AV_UNMUTE_EN | CFG4_AV_UNMUTE_SET); + regmap_update_bits(hdmi->regs, GRL_CFG4, CFG4_AV_UNMUTE_EN | CFG4_AV_UNMUTE_SET, + CFG4_AV_UNMUTE_EN); usleep_range(2000, 4000); - mtk_hdmi_mask(hdmi, GRL_CFG4, CFG4_AV_UNMUTE_SET, - CFG4_AV_UNMUTE_EN | CFG4_AV_UNMUTE_SET); + regmap_update_bits(hdmi->regs, GRL_CFG4, CFG4_AV_UNMUTE_EN | CFG4_AV_UNMUTE_SET, + CFG4_AV_UNMUTE_SET); } static void mtk_hdmi_hw_ncts_enable(struct mtk_hdmi *hdmi, bool on) { - mtk_hdmi_mask(hdmi, GRL_CTS_CTRL, on ? 0 : CTS_CTRL_SOFT, - CTS_CTRL_SOFT); + regmap_update_bits(hdmi->regs, GRL_CTS_CTRL, CTS_CTRL_SOFT, + on ? 0 : CTS_CTRL_SOFT); } static void mtk_hdmi_hw_ncts_auto_write_enable(struct mtk_hdmi *hdmi, bool enable) { - mtk_hdmi_mask(hdmi, GRL_CTS_CTRL, enable ? NCTS_WRI_ANYTIME : 0, - NCTS_WRI_ANYTIME); + regmap_update_bits(hdmi->regs, GRL_CTS_CTRL, NCTS_WRI_ANYTIME, + enable ? NCTS_WRI_ANYTIME : 0); } static void mtk_hdmi_hw_msic_setting(struct mtk_hdmi *hdmi, struct drm_display_mode *mode) { - mtk_hdmi_clear_bits(hdmi, GRL_CFG4, CFG4_MHL_MODE); + regmap_clear_bits(hdmi->regs, GRL_CFG4, CFG4_MHL_MODE); if (mode->flags & DRM_MODE_FLAG_INTERLACE && mode->clock == 74250 && mode->vdisplay == 1080) - mtk_hdmi_clear_bits(hdmi, GRL_CFG2, CFG2_MHL_DE_SEL); + regmap_clear_bits(hdmi->regs, GRL_CFG2, CFG2_MHL_DE_SEL); else - mtk_hdmi_set_bits(hdmi, GRL_CFG2, CFG2_MHL_DE_SEL); + regmap_set_bits(hdmi->regs, GRL_CFG2, CFG2_MHL_DE_SEL); } static void mtk_hdmi_hw_aud_set_channel_swap(struct mtk_hdmi *hdmi, @@ -437,7 +397,7 @@ static void mtk_hdmi_hw_aud_set_channel_swap(struct mtk_hdmi *hdmi, swap_bit = LFE_CC_SWAP; break; } - mtk_hdmi_mask(hdmi, GRL_CH_SWAP, swap_bit, 0xff); + regmap_update_bits(hdmi->regs, GRL_CH_SWAP, 0xff, swap_bit); } static void mtk_hdmi_hw_aud_set_bit_num(struct mtk_hdmi *hdmi, @@ -458,7 +418,7 @@ static void mtk_hdmi_hw_aud_set_bit_num(struct mtk_hdmi *hdmi, break; } - mtk_hdmi_mask(hdmi, GRL_AOUT_CFG, val, AOUT_BNUM_SEL_MASK); + regmap_update_bits(hdmi->regs, GRL_AOUT_CFG, AOUT_BNUM_SEL_MASK, val); } static void mtk_hdmi_hw_aud_set_i2s_fmt(struct mtk_hdmi *hdmi, @@ -466,7 +426,7 @@ static void mtk_hdmi_hw_aud_set_i2s_fmt(struct mtk_hdmi *hdmi, { u32 val; - val = mtk_hdmi_read(hdmi, GRL_CFG0); + regmap_read(hdmi->regs, GRL_CFG0, &val); val &= ~(CFG0_W_LENGTH_MASK | CFG0_I2S_MODE_MASK); switch (i2s_fmt) { @@ -490,7 +450,7 @@ static void mtk_hdmi_hw_aud_set_i2s_fmt(struct mtk_hdmi *hdmi, val |= CFG0_I2S_MODE_I2S | CFG0_W_LENGTH_16BIT; break; } - mtk_hdmi_write(hdmi, GRL_CFG0, val); + regmap_write(hdmi->regs, GRL_CFG0, val); } static void mtk_hdmi_hw_audio_config(struct mtk_hdmi *hdmi, bool dst) @@ -499,14 +459,14 @@ static void mtk_hdmi_hw_audio_config(struct mtk_hdmi *hdmi, bool dst) u8 val; /* Disable high bitrate, set DST packet normal/double */ - mtk_hdmi_clear_bits(hdmi, GRL_AOUT_CFG, HIGH_BIT_RATE_PACKET_ALIGN); + regmap_clear_bits(hdmi->regs, GRL_AOUT_CFG, HIGH_BIT_RATE_PACKET_ALIGN); if (dst) val = DST_NORMAL_DOUBLE | SACD_DST; else val = 0; - mtk_hdmi_mask(hdmi, GRL_AUDIO_CFG, val, mask); + regmap_update_bits(hdmi->regs, GRL_AUDIO_CFG, mask, val); } static void mtk_hdmi_hw_aud_set_i2s_chan_num(struct mtk_hdmi *hdmi, @@ -547,10 +507,10 @@ static void mtk_hdmi_hw_aud_set_i2s_chan_num(struct mtk_hdmi *hdmi, i2s_uv = I2S_UV_CH_EN(0); } - mtk_hdmi_write(hdmi, GRL_CH_SW0, ch_switch & 0xff); - mtk_hdmi_write(hdmi, GRL_CH_SW1, (ch_switch >> 8) & 0xff); - mtk_hdmi_write(hdmi, GRL_CH_SW2, (ch_switch >> 16) & 0xff); - mtk_hdmi_write(hdmi, GRL_I2S_UV, i2s_uv); + regmap_write(hdmi->regs, GRL_CH_SW0, ch_switch & 0xff); + regmap_write(hdmi->regs, GRL_CH_SW1, (ch_switch >> 8) & 0xff); + regmap_write(hdmi->regs, GRL_CH_SW2, (ch_switch >> 16) & 0xff); + regmap_write(hdmi->regs, GRL_I2S_UV, i2s_uv); } static void mtk_hdmi_hw_aud_set_input_type(struct mtk_hdmi *hdmi, @@ -558,7 +518,7 @@ static void mtk_hdmi_hw_aud_set_input_type(struct mtk_hdmi *hdmi, { u32 val; - val = mtk_hdmi_read(hdmi, GRL_CFG1); + regmap_read(hdmi->regs, GRL_CFG1, &val); if (input_type == HDMI_AUD_INPUT_I2S && (val & CFG1_SPDIF) == CFG1_SPDIF) { val &= ~CFG1_SPDIF; @@ -566,7 +526,7 @@ static void mtk_hdmi_hw_aud_set_input_type(struct mtk_hdmi *hdmi, (val & CFG1_SPDIF) == 0) { val |= CFG1_SPDIF; } - mtk_hdmi_write(hdmi, GRL_CFG1, val); + regmap_write(hdmi->regs, GRL_CFG1, val); } static void mtk_hdmi_hw_aud_set_channel_status(struct mtk_hdmi *hdmi, @@ -575,13 +535,13 @@ static void mtk_hdmi_hw_aud_set_channel_status(struct mtk_hdmi *hdmi, int i; for (i = 0; i < 5; i++) { - mtk_hdmi_write(hdmi, GRL_I2S_C_STA0 + i * 4, channel_status[i]); - mtk_hdmi_write(hdmi, GRL_L_STATUS_0 + i * 4, channel_status[i]); - mtk_hdmi_write(hdmi, GRL_R_STATUS_0 + i * 4, channel_status[i]); + regmap_write(hdmi->regs, GRL_I2S_C_STA0 + i * 4, channel_status[i]); + regmap_write(hdmi->regs, GRL_L_STATUS_0 + i * 4, channel_status[i]); + regmap_write(hdmi->regs, GRL_R_STATUS_0 + i * 4, channel_status[i]); } for (; i < 24; i++) { - mtk_hdmi_write(hdmi, GRL_L_STATUS_0 + i * 4, 0); - mtk_hdmi_write(hdmi, GRL_R_STATUS_0 + i * 4, 0); + regmap_write(hdmi->regs, GRL_L_STATUS_0 + i * 4, 0); + regmap_write(hdmi->regs, GRL_R_STATUS_0 + i * 4, 0); } } @@ -589,13 +549,13 @@ static void mtk_hdmi_hw_aud_src_reenable(struct mtk_hdmi *hdmi) { u32 val; - val = mtk_hdmi_read(hdmi, GRL_MIX_CTRL); + regmap_read(hdmi->regs, GRL_MIX_CTRL, &val); if (val & MIX_CTRL_SRC_EN) { val &= ~MIX_CTRL_SRC_EN; - mtk_hdmi_write(hdmi, GRL_MIX_CTRL, val); + regmap_write(hdmi->regs, GRL_MIX_CTRL, val); usleep_range(255, 512); val |= MIX_CTRL_SRC_EN; - mtk_hdmi_write(hdmi, GRL_MIX_CTRL, val); + regmap_write(hdmi->regs, GRL_MIX_CTRL, val); } } @@ -603,10 +563,10 @@ static void mtk_hdmi_hw_aud_src_disable(struct mtk_hdmi *hdmi) { u32 val; - val = mtk_hdmi_read(hdmi, GRL_MIX_CTRL); + regmap_read(hdmi->regs, GRL_MIX_CTRL, &val); val &= ~MIX_CTRL_SRC_EN; - mtk_hdmi_write(hdmi, GRL_MIX_CTRL, val); - mtk_hdmi_write(hdmi, GRL_SHIFT_L1, 0x00); + regmap_write(hdmi->regs, GRL_MIX_CTRL, val); + regmap_write(hdmi->regs, GRL_SHIFT_L1, 0x00); } static void mtk_hdmi_hw_aud_set_mclk(struct mtk_hdmi *hdmi, @@ -614,7 +574,7 @@ static void mtk_hdmi_hw_aud_set_mclk(struct mtk_hdmi *hdmi, { u32 val; - val = mtk_hdmi_read(hdmi, GRL_CFG5); + regmap_read(hdmi->regs, GRL_CFG5, &val); val &= CFG5_CD_RATIO_MASK; switch (mclk) { @@ -637,7 +597,7 @@ static void mtk_hdmi_hw_aud_set_mclk(struct mtk_hdmi *hdmi, val |= CFG5_FS256; break; } - mtk_hdmi_write(hdmi, GRL_CFG5, val); + regmap_write(hdmi->regs, GRL_CFG5, val); } struct hdmi_acr_n { @@ -721,9 +681,9 @@ static void do_hdmi_hw_aud_set_ncts(struct mtk_hdmi *hdmi, unsigned int n, unsigned char val[NCTS_BYTES]; int i; - mtk_hdmi_write(hdmi, GRL_NCTS, 0); - mtk_hdmi_write(hdmi, GRL_NCTS, 0); - mtk_hdmi_write(hdmi, GRL_NCTS, 0); + regmap_write(hdmi->regs, GRL_NCTS, 0); + regmap_write(hdmi->regs, GRL_NCTS, 0); + regmap_write(hdmi->regs, GRL_NCTS, 0); memset(val, 0, sizeof(val)); val[0] = (cts >> 24) & 0xff; @@ -736,7 +696,7 @@ static void do_hdmi_hw_aud_set_ncts(struct mtk_hdmi *hdmi, unsigned int n, val[6] = n & 0xff; for (i = 0; i < NCTS_BYTES; i++) - mtk_hdmi_write(hdmi, GRL_NCTS, val[i]); + regmap_write(hdmi->regs, GRL_NCTS, val[i]); } static void mtk_hdmi_hw_aud_set_ncts(struct mtk_hdmi *hdmi, @@ -751,8 +711,7 @@ static void mtk_hdmi_hw_aud_set_ncts(struct mtk_hdmi *hdmi, dev_dbg(hdmi->dev, "%s: sample_rate=%u, clock=%d, cts=%u, n=%u\n", __func__, sample_rate, clock, n, cts); - mtk_hdmi_mask(hdmi, DUMMY_304, AUDIO_I2S_NCTS_SEL_64, - AUDIO_I2S_NCTS_SEL); + regmap_update_bits(hdmi->regs, DUMMY_304, AUDIO_I2S_NCTS_SEL, AUDIO_I2S_NCTS_SEL_64); do_hdmi_hw_aud_set_ncts(hdmi, n, cts); } @@ -872,7 +831,7 @@ static void mtk_hdmi_aud_set_input(struct mtk_hdmi *hdmi) bool dst; mtk_hdmi_hw_aud_set_channel_swap(hdmi, HDMI_AUD_SWAP_LFE_CC); - mtk_hdmi_set_bits(hdmi, GRL_MIX_CTRL, MIX_CTRL_FLAT); + regmap_set_bits(hdmi->regs, GRL_MIX_CTRL, MIX_CTRL_FLAT); if (hdmi->aud_param.aud_input_type == HDMI_AUD_INPUT_SPDIF && hdmi->aud_param.aud_codec == HDMI_AUDIO_CODING_TYPE_DST) { @@ -904,7 +863,7 @@ static int mtk_hdmi_aud_set_src(struct mtk_hdmi *hdmi, mtk_hdmi_hw_ncts_enable(hdmi, false); mtk_hdmi_hw_aud_src_disable(hdmi); - mtk_hdmi_clear_bits(hdmi, GRL_CFG2, CFG2_ACLK_INV); + regmap_clear_bits(hdmi->regs, GRL_CFG2, CFG2_ACLK_INV); if (hdmi->aud_param.aud_input_type == HDMI_AUD_INPUT_I2S) { switch (sample_rate) { @@ -1455,7 +1414,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, } hdmi->sys_regmap = regmap; - hdmi->regs = devm_platform_ioremap_resource(pdev, 0); + hdmi->regs = device_node_to_regmap(dev->of_node); if (IS_ERR(hdmi->regs)) { ret = PTR_ERR(hdmi->regs); goto put_device; -- GitLab From b506ff3316ae0545961417c76131b084d2c9bc6c Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:06 +0200 Subject: [PATCH 323/899] drm/mediatek: mtk_hdmi: Disgregate function mtk_hdmi_audio_set_param() As a cleanup, and in preparation for splitting common bits of this driver, disgregate the code in function mtk_hdmi_audio_set_param() to the beginning and end of function mtk_hdmi_audio_hw_params(). In a later commit, the hw_params callback function will also be disgregated so that the code will get two functions: one that performs the generic hdmi_audio_param copy, and one that performs IP specific setup, both of which will be called in the callback, allowing all of the non IP version specific code to get moved in a common file. Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-9-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 5a1552f5a519d..be4b34d83db1f 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1045,20 +1045,6 @@ static void mtk_hdmi_audio_disable(struct mtk_hdmi *hdmi) hdmi->audio_enable = false; } -static int mtk_hdmi_audio_set_param(struct mtk_hdmi *hdmi, - struct hdmi_audio_param *param) -{ - if (!hdmi->audio_enable) { - dev_err(hdmi->dev, "hdmi audio is in disable state!\n"); - return -EINVAL; - } - dev_dbg(hdmi->dev, "codec:%d, input:%d, channel:%d, fs:%d\n", - param->aud_codec, param->aud_input_type, - param->aud_input_chan_type, param->codec_params.sample_rate); - memcpy(&hdmi->aud_param, param, sizeof(*param)); - return mtk_hdmi_aud_output_config(hdmi, &hdmi->mode); -} - static int mtk_hdmi_output_set_display_mode(struct mtk_hdmi *hdmi, struct drm_display_mode *mode) { @@ -1472,6 +1458,11 @@ static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, struct hdmi_audio_param hdmi_params; unsigned int chan = params->cea.channels; + if (!hdmi->audio_enable) { + dev_err(hdmi->dev, "hdmi audio is in disable state!\n"); + return -EINVAL; + } + dev_dbg(hdmi->dev, "%s: %u Hz, %d bit, %d channels\n", __func__, params->sample_rate, params->sample_width, chan); @@ -1532,8 +1523,13 @@ static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, memcpy(&hdmi_params.codec_params, params, sizeof(hdmi_params.codec_params)); + memcpy(&hdmi->aud_param, &hdmi_params, sizeof(hdmi_params)); + + dev_dbg(hdmi->dev, "codec:%d, input:%d, channel:%d, fs:%d\n", + hdmi_params.aud_codec, hdmi_params.aud_input_type, + hdmi_params.aud_input_chan_type, hdmi_params.codec_params.sample_rate); - mtk_hdmi_audio_set_param(hdmi, &hdmi_params); + mtk_hdmi_aud_output_config(hdmi, &hdmi->mode); return 0; } -- GitLab From 332de7d7c23f41463ecf53b05b579763e29e6355 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:07 +0200 Subject: [PATCH 324/899] drm/mediatek: mtk_hdmi: Move audio params selection to new function In preparation for splitting common bits of this driver, move the audio params (codec, sample rate/size, input type, i2s format, etc) selection to a new function called mtk_hdmi_audio_params(). While at it, also rename "hdmi_params" to "aud_params" both to match the mtk_hdmi struct member name and to clarify that those parameters are for HDMI Audio and not for HDMI Video configuration. Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-10-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 63 ++++++++++++++++------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index be4b34d83db1f..697508edfa41f 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1450,19 +1450,13 @@ put_device: * HDMI audio codec callbacks */ -static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, - struct hdmi_codec_daifmt *daifmt, - struct hdmi_codec_params *params) +static int mtk_hdmi_audio_params(struct mtk_hdmi *hdmi, + struct hdmi_codec_daifmt *daifmt, + struct hdmi_codec_params *params) { - struct mtk_hdmi *hdmi = dev_get_drvdata(dev); - struct hdmi_audio_param hdmi_params; + struct hdmi_audio_param aud_params = { 0 }; unsigned int chan = params->cea.channels; - if (!hdmi->audio_enable) { - dev_err(hdmi->dev, "hdmi audio is in disable state!\n"); - return -EINVAL; - } - dev_dbg(hdmi->dev, "%s: %u Hz, %d bit, %d channels\n", __func__, params->sample_rate, params->sample_width, chan); @@ -1471,16 +1465,16 @@ static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, switch (chan) { case 2: - hdmi_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_2_0; + aud_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_2_0; break; case 4: - hdmi_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_4_0; + aud_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_4_0; break; case 6: - hdmi_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_5_1; + aud_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_5_1; break; case 8: - hdmi_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_7_1; + aud_params.aud_input_chan_type = HDMI_AUD_CHAN_TYPE_7_1; break; default: dev_err(hdmi->dev, "channel[%d] not supported!\n", chan); @@ -1504,31 +1498,44 @@ static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, switch (daifmt->fmt) { case HDMI_I2S: - hdmi_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - hdmi_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; - hdmi_params.aud_input_type = HDMI_AUD_INPUT_I2S; - hdmi_params.aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; - hdmi_params.aud_mclk = HDMI_AUD_MCLK_128FS; + aud_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; + aud_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; + aud_params.aud_input_type = HDMI_AUD_INPUT_I2S; + aud_params.aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; + aud_params.aud_mclk = HDMI_AUD_MCLK_128FS; break; case HDMI_SPDIF: - hdmi_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - hdmi_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; - hdmi_params.aud_input_type = HDMI_AUD_INPUT_SPDIF; + aud_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; + aud_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; + aud_params.aud_input_type = HDMI_AUD_INPUT_SPDIF; break; default: dev_err(hdmi->dev, "%s: Invalid DAI format %d\n", __func__, daifmt->fmt); return -EINVAL; } - - memcpy(&hdmi_params.codec_params, params, - sizeof(hdmi_params.codec_params)); - memcpy(&hdmi->aud_param, &hdmi_params, sizeof(hdmi_params)); + memcpy(&aud_params.codec_params, params, sizeof(aud_params.codec_params)); + memcpy(&hdmi->aud_param, &aud_params, sizeof(aud_params)); dev_dbg(hdmi->dev, "codec:%d, input:%d, channel:%d, fs:%d\n", - hdmi_params.aud_codec, hdmi_params.aud_input_type, - hdmi_params.aud_input_chan_type, hdmi_params.codec_params.sample_rate); + aud_params.aud_codec, aud_params.aud_input_type, + aud_params.aud_input_chan_type, aud_params.codec_params.sample_rate); + + return 0; +} + +static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, + struct hdmi_codec_daifmt *daifmt, + struct hdmi_codec_params *params) +{ + struct mtk_hdmi *hdmi = dev_get_drvdata(dev); + + if (!hdmi->audio_enable) { + dev_err(hdmi->dev, "hdmi audio is in disable state!\n"); + return -EINVAL; + } + mtk_hdmi_audio_params(hdmi, daifmt, params); mtk_hdmi_aud_output_config(hdmi, &hdmi->mode); return 0; -- GitLab From c92493e3074478c9c9dc4b5ba2f59cf5e4836bb9 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:08 +0200 Subject: [PATCH 325/899] drm/mediatek: mtk_hdmi: Move plugged_cb/codec_dev setting to new function In preparation for splitting common bits of this driver, move the mutex-protected cable plugged callback and codec device setting to a new function called mtk_hdmi_audio_set_plugged_cb(). Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-11-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 697508edfa41f..60ec16f6827ff 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1582,17 +1582,22 @@ static int mtk_hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, return 0; } -static int mtk_hdmi_audio_hook_plugged_cb(struct device *dev, void *data, - hdmi_codec_plugged_cb fn, +static void mtk_hdmi_audio_set_plugged_cb(struct mtk_hdmi *hdmi, hdmi_codec_plugged_cb fn, struct device *codec_dev) { - struct mtk_hdmi *hdmi = data; - mutex_lock(&hdmi->update_plugged_status_lock); hdmi->plugged_cb = fn; hdmi->codec_dev = codec_dev; mutex_unlock(&hdmi->update_plugged_status_lock); +} + +static int mtk_hdmi_audio_hook_plugged_cb(struct device *dev, void *data, + hdmi_codec_plugged_cb fn, + struct device *codec_dev) +{ + struct mtk_hdmi *hdmi = data; + mtk_hdmi_audio_set_plugged_cb(hdmi, fn, codec_dev); mtk_hdmi_update_plugged_status(hdmi); return 0; -- GitLab From 7837702058a16676cf0b0e0a83babe3cb0552cb0 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:09 +0200 Subject: [PATCH 326/899] drm/mediatek: mtk_hdmi: Move N/CTS setting to new function In preparation for splitting common bits of this driver, moving the hdmi_rec_n_table struct array, and the hdmi_recommended_n(), hdmi_mode_clock_to_hz(), hdmi_expected_cts() functions, add one function called mtk_hdmi_get_ncts() that sets both N and CTS in a single call. Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-12-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 60ec16f6827ff..0933b22b81ff4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -675,6 +675,13 @@ static unsigned int hdmi_expected_cts(unsigned int audio_sample_rate, 128 * audio_sample_rate); } +static void mtk_hdmi_get_ncts(unsigned int sample_rate, unsigned int clock, + unsigned int *n, unsigned int *cts) +{ + *n = hdmi_recommended_n(sample_rate, clock); + *cts = hdmi_expected_cts(sample_rate, clock, *n); +} + static void do_hdmi_hw_aud_set_ncts(struct mtk_hdmi *hdmi, unsigned int n, unsigned int cts) { @@ -705,8 +712,7 @@ static void mtk_hdmi_hw_aud_set_ncts(struct mtk_hdmi *hdmi, { unsigned int n, cts; - n = hdmi_recommended_n(sample_rate, clock); - cts = hdmi_expected_cts(sample_rate, clock, n); + mtk_hdmi_get_ncts(sample_rate, clock, &n, &cts); dev_dbg(hdmi->dev, "%s: sample_rate=%u, clock=%d, cts=%u, n=%u\n", __func__, sample_rate, clock, n, cts); -- GitLab From 03c7aea33cb5860b4468c65a1715be76b54fb882 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:10 +0200 Subject: [PATCH 327/899] drm/mediatek: mtk_hdmi: Use dev_err_probe() in mtk_hdmi_dt_parse_pdata() Change error prints to use dev_err_probe() instead of dev_err() where possible in function mtk_hdmi_dt_parse_pdata(), used only during device probe. While at it, also beautify some prints. Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-13-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 0933b22b81ff4..6c65d256e4d39 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1368,19 +1368,13 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, int ret; ret = mtk_hdmi_get_all_clk(hdmi, np); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "Failed to get clocks: %d\n", ret); - - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "Failed to get clocks\n"); /* The CEC module handles HDMI hotplug detection */ cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec"); - if (!cec_np) { - dev_err(dev, "Failed to find CEC node\n"); - return -EINVAL; - } + if (!cec_np) + return dev_err_probe(dev, -EINVAL, "Failed to find CEC node\n"); cec_pdev = of_find_device_by_node(cec_np); if (!cec_pdev) { @@ -1429,20 +1423,16 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, } i2c_np = of_parse_phandle(remote, "ddc-i2c-bus", 0); + of_node_put(remote); if (!i2c_np) { - dev_err(dev, "Failed to find ddc-i2c-bus node in %pOF\n", - remote); - of_node_put(remote); - ret = -EINVAL; + ret = dev_err_probe(dev, -EINVAL, "No ddc-i2c-bus in connector\n"); goto put_device; } - of_node_put(remote); hdmi->ddc_adpt = of_find_i2c_adapter_by_node(i2c_np); of_node_put(i2c_np); if (!hdmi->ddc_adpt) { - dev_err(dev, "Failed to get ddc i2c adapter by node\n"); - ret = -EINVAL; + ret = dev_err_probe(dev, -EINVAL, "Failed to get ddc i2c adapter by node\n"); goto put_device; } -- GitLab From 7485be967f7f39e931967b33c71ece88a573fc90 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:11 +0200 Subject: [PATCH 328/899] drm/mediatek: mtk_hdmi: Move CEC device parsing in new function Move the CEC device parsing logic to a new function called mtk_hdmi_get_cec_dev(), and move the parsing action to the end of mtk_hdmi_dt_parse_pdata(), allowing to remove gotos in this function, reducing code size and improving readability. Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-14-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 72 ++++++++++++++--------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 6c65d256e4d39..cba1d3172472b 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1357,14 +1357,10 @@ static const struct drm_bridge_funcs mtk_hdmi_bridge_funcs = { .edid_read = mtk_hdmi_bridge_edid_read, }; -static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, - struct platform_device *pdev) +static int mtk_hdmi_get_cec_dev(struct mtk_hdmi *hdmi, struct device *dev, struct device_node *np) { - struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; - struct device_node *cec_np, *remote, *i2c_np; struct platform_device *cec_pdev; - struct regmap *regmap; + struct device_node *cec_np; int ret; ret = mtk_hdmi_get_all_clk(hdmi, np); @@ -1384,62 +1380,66 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, return -EPROBE_DEFER; } of_node_put(cec_np); - hdmi->cec_dev = &cec_pdev->dev; /* * The mediatek,syscon-hdmi property contains a phandle link to the * MMSYS_CONFIG device and the register offset of the HDMI_SYS_CFG * registers it contains. */ - regmap = syscon_regmap_lookup_by_phandle_args(np, "mediatek,syscon-hdmi", - 1, &hdmi->sys_offset); - if (IS_ERR(regmap)) { - ret = dev_err_probe(dev, PTR_ERR(regmap), - "Failed to get system configuration registers\n"); - goto put_device; - } - hdmi->sys_regmap = regmap; + hdmi->sys_regmap = syscon_regmap_lookup_by_phandle_args(np, "mediatek,syscon-hdmi", + 1, &hdmi->sys_offset); + if (IS_ERR(hdmi->sys_regmap)) + return dev_err_probe(dev, PTR_ERR(hdmi->sys_regmap), + "Failed to get system configuration registers\n"); + + hdmi->cec_dev = &cec_pdev->dev; + return 0; +} + +static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, + struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct device_node *remote, *i2c_np; + int ret; + + ret = mtk_hdmi_get_all_clk(hdmi, np); + if (ret) + return dev_err_probe(dev, ret, "Failed to get clocks\n"); hdmi->regs = device_node_to_regmap(dev->of_node); - if (IS_ERR(hdmi->regs)) { - ret = PTR_ERR(hdmi->regs); - goto put_device; - } + if (IS_ERR(hdmi->regs)) + return PTR_ERR(hdmi->regs); remote = of_graph_get_remote_node(np, 1, 0); - if (!remote) { - ret = -EINVAL; - goto put_device; - } + if (!remote) + return -EINVAL; if (!of_device_is_compatible(remote, "hdmi-connector")) { hdmi->next_bridge = of_drm_find_bridge(remote); if (!hdmi->next_bridge) { dev_err(dev, "Waiting for external bridge\n"); of_node_put(remote); - ret = -EPROBE_DEFER; - goto put_device; + return -EPROBE_DEFER; } } i2c_np = of_parse_phandle(remote, "ddc-i2c-bus", 0); of_node_put(remote); - if (!i2c_np) { - ret = dev_err_probe(dev, -EINVAL, "No ddc-i2c-bus in connector\n"); - goto put_device; - } + if (!i2c_np) + return dev_err_probe(dev, -EINVAL, "No ddc-i2c-bus in connector\n"); hdmi->ddc_adpt = of_find_i2c_adapter_by_node(i2c_np); of_node_put(i2c_np); - if (!hdmi->ddc_adpt) { - ret = dev_err_probe(dev, -EINVAL, "Failed to get ddc i2c adapter by node\n"); - goto put_device; - } + if (!hdmi->ddc_adpt) + return dev_err_probe(dev, -EINVAL, "Failed to get ddc i2c adapter by node\n"); + + ret = mtk_hdmi_get_cec_dev(hdmi, dev, np); + if (ret) + return ret; return 0; -put_device: - put_device(hdmi->cec_dev); - return ret; } /* -- GitLab From 26c691a6620069894ba3fde9147cf49d340870b3 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 15 Apr 2025 12:43:12 +0200 Subject: [PATCH 329/899] drm/mediatek: mtk_hdmi: Move output init to mtk_hdmi_register_audio_driver() In preparation for moving the common bits of this driver, merge the contents of mtk_hdmi_output_init in mtk_hdmi_register_audio_driver function to aggregate all of the initial audio setup together in the same function and to make it clear that all of the setup that is performed in mtk_hdmi_output_init is specifically related only to audio and not video. Reviewed-by: CK Hu Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250415104321.51149-15-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index cba1d3172472b..1a0c8d092b6b8 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1025,20 +1025,6 @@ static int mtk_hdmi_setup_vendor_specific_infoframe(struct mtk_hdmi *hdmi, return 0; } -static int mtk_hdmi_output_init(struct mtk_hdmi *hdmi) -{ - struct hdmi_audio_param *aud_param = &hdmi->aud_param; - - aud_param->aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - aud_param->aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; - aud_param->aud_input_type = HDMI_AUD_INPUT_I2S; - aud_param->aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; - aud_param->aud_mclk = HDMI_AUD_MCLK_128FS; - aud_param->aud_input_chan_type = HDMI_AUD_CHAN_TYPE_2_0; - - return 0; -} - static void mtk_hdmi_audio_enable(struct mtk_hdmi *hdmi) { mtk_hdmi_hw_send_aud_packet(hdmi, true); @@ -1616,6 +1602,7 @@ static void mtk_hdmi_unregister_audio_driver(void *data) static int mtk_hdmi_register_audio_driver(struct device *dev) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); + struct hdmi_audio_param *aud_param = &hdmi->aud_param; struct hdmi_codec_pdata codec_data = { .ops = &mtk_hdmi_audio_codec_ops, .max_i2s_channels = 2, @@ -1625,6 +1612,13 @@ static int mtk_hdmi_register_audio_driver(struct device *dev) }; int ret; + aud_param->aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; + aud_param->aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; + aud_param->aud_input_type = HDMI_AUD_INPUT_I2S; + aud_param->aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; + aud_param->aud_mclk = HDMI_AUD_MCLK_128FS; + aud_param->aud_input_chan_type = HDMI_AUD_CHAN_TYPE_2_0; + hdmi->audio_pdev = platform_device_register_data(dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO, @@ -1666,11 +1660,6 @@ static int mtk_hdmi_probe(struct platform_device *pdev) mutex_init(&hdmi->update_plugged_status_lock); platform_set_drvdata(pdev, hdmi); - ret = mtk_hdmi_output_init(hdmi); - if (ret) - return dev_err_probe(dev, ret, - "Failed to initialize hdmi output\n"); - ret = mtk_hdmi_register_audio_driver(dev); if (ret) return dev_err_probe(dev, ret, -- GitLab From 38feab2deac8732247cb54d05a26ca07b5c4d0e0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 22:16:53 -0500 Subject: [PATCH 330/899] drm/amdgpu/userq/mes: remove unused header This is unused so remove it. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index b9705cbec74d2..7ae4759b5b569 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -25,7 +25,6 @@ #include "amdgpu_gfx.h" #include "mes_userqueue.h" #include "amdgpu_userq_fence.h" -#include "v11_structs.h" #include #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE -- GitLab From 51a9ea455115fc75ef703dbc2ac04ca59b47b174 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Apr 2025 12:29:37 -0400 Subject: [PATCH 331/899] drm/amdgpu/userq: rename suspend/resume callbacks Rename to map and umap to better align with what is happening at the firmware level and remove the extra level of indirection in the MES userq code. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 8 ++-- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 46 ++++++------------- 3 files changed, 23 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index ecd49cf15b2a9..2be1e54b78997 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -432,11 +432,11 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { userq_funcs = adev->userq_funcs[queue->queue_type]; - ret = userq_funcs->resume(uq_mgr, queue); + ret = userq_funcs->map(uq_mgr, queue); } if (ret) - DRM_ERROR("Failed to resume all the queue\n"); + DRM_ERROR("Failed to map all the queues\n"); return ret; } @@ -587,14 +587,14 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - /* Try to suspend all the queues in this process ctx */ + /* Try to unmap all the queues in this process ctx */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { userq_funcs = adev->userq_funcs[queue->queue_type]; - ret += userq_funcs->suspend(uq_mgr, queue); + ret += userq_funcs->unmap(uq_mgr, queue); } if (ret) - DRM_ERROR("Couldn't suspend all the queues\n"); + DRM_ERROR("Couldn't unmap all the queues\n"); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h index ec1a4ca6f6321..a0cf6978f2ba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h @@ -64,10 +64,10 @@ struct amdgpu_userq_funcs { struct amdgpu_usermode_queue *queue); void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *uq); - int (*suspend)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue); - int (*resume)(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue); + int (*unmap)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*map)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); }; /* Usermode queues for gfx */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 7ae4759b5b569..fe6fc3e0a3204 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -98,14 +98,17 @@ mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, } static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct amdgpu_mqd_prop *userq_props) + struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_userq_obj *ctx = &queue->fw_obj; + struct amdgpu_mqd_prop *userq_props = queue->userq_prop; struct mes_add_queue_input queue_input; int r; + if (queue->queue_active) + return 0; + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_va_start = 0; @@ -143,14 +146,17 @@ static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; struct mes_remove_queue_input queue_input; struct amdgpu_userq_obj *ctx = &queue->fw_obj; int r; + if (!queue->queue_active) + return 0; + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); queue_input.doorbell_offset = queue->doorbell_index; queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; @@ -161,6 +167,7 @@ static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, if (r) DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); queue->queue_active = false; + return r; } static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, @@ -314,7 +321,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, } /* Map userqueue into FW using MES */ - r = mes_userq_map(uq_mgr, queue, userq_props); + r = mes_userq_map(uq_mgr, queue); if (r) { DRM_ERROR("Failed to init MQD\n"); goto free_ctx; @@ -354,34 +361,9 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } -static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) -{ - if (queue->queue_active) - mes_userq_unmap(uq_mgr, queue); - - return 0; -} - -static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) -{ - int ret; - - if (queue->queue_active) - return 0; - - ret = mes_userq_map(uq_mgr, queue, queue->userq_prop); - if (ret) { - DRM_ERROR("Failed to resume queue\n"); - return ret; - } - return 0; -} - const struct amdgpu_userq_funcs userq_mes_funcs = { .mqd_create = mes_userq_mqd_create, .mqd_destroy = mes_userq_mqd_destroy, - .suspend = mes_userq_suspend, - .resume = mes_userq_resume, + .unmap = mes_userq_unmap, + .map = mes_userq_map, }; -- GitLab From b0db33c8c50f5e89557d4b11da94820677f0c9a2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Feb 2025 14:47:00 -0500 Subject: [PATCH 332/899] drm/amdgpu/userq: rework front end call sequence Split out the queue map from the mqd create call and split out the queue unmap from the mqd destroy call. This splits the queue setup and teardown with the actual enablement in the firmware. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 17 +++++++++++++++-- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 10 ---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 2be1e54b78997..c3873041ec942 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -263,7 +263,10 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) { struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; + int r; cancel_delayed_work(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); @@ -274,12 +277,13 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) mutex_unlock(&uq_mgr->userq_mutex); return -EINVAL; } - + uq_funcs = adev->userq_funcs[queue->queue_type]; + r = uq_funcs->unmap(uq_mgr, queue); amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); - return 0; + return r; } static int @@ -364,6 +368,15 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = -ENOMEM; goto unlock; } + + r = uq_funcs->map(uq_mgr, queue); + if (r) { + DRM_ERROR("Failed to map Queue\n"); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + goto unlock; + } + args->out.queue_id = qid; unlock: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index fe6fc3e0a3204..e3c3fc160b799 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -320,13 +320,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_ctx; } - /* Map userqueue into FW using MES */ - r = mes_userq_map(uq_mgr, queue); - if (r) { - DRM_ERROR("Failed to init MQD\n"); - goto free_ctx; - } - return 0; free_ctx: @@ -350,9 +343,6 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, { struct amdgpu_device *adev = uq_mgr->adev; - if (queue->queue_active) - mes_userq_unmap(uq_mgr, queue); - amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); -- GitLab From edc762a51c7181d6fe1e0837e2eb69afb406f98e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Apr 2025 13:49:47 -0400 Subject: [PATCH 333/899] drm/amdgpu/userq: move some code around Move some userq fence handling code into amdgpu_userq_fence.c. This matches the other code in that file. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 26 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 26 ------------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index a4953d668972a..ee73d7846c2ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -120,6 +120,32 @@ free_fence_drv: return r; } +static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) +{ + struct amdgpu_userq_fence_driver *fence_drv; + unsigned long index; + + if (xa_empty(xa)) + return; + + xa_lock(xa); + xa_for_each(xa, index, fence_drv) { + __xa_erase(xa, index); + amdgpu_userq_fence_driver_put(fence_drv); + } + + xa_unlock(xa); +} + +void +amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) +{ + amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); + xa_destroy(&userq->fence_drv_xa); + /* Drop the fence_drv reference held by user queue */ + amdgpu_userq_fence_driver_put(userq->fence_drv); +} + void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) { struct amdgpu_userq_fence *userq_fence, *tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index f0a91cc028808..7bbae238cca0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -66,6 +66,7 @@ void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index c3873041ec942..79f4df4255c50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -28,32 +28,6 @@ #include "amdgpu_userqueue.h" #include "amdgpu_userq_fence.h" -static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) -{ - struct amdgpu_userq_fence_driver *fence_drv; - unsigned long index; - - if (xa_empty(xa)) - return; - - xa_lock(xa); - xa_for_each(xa, index, fence_drv) { - __xa_erase(xa, index); - amdgpu_userq_fence_driver_put(fence_drv); - } - - xa_unlock(xa); -} - -static void -amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) -{ - amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); - xa_destroy(&userq->fence_drv_xa); - /* Drop the fence_drv reference held by user queue */ - amdgpu_userq_fence_driver_put(userq->fence_drv); -} - static void amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, -- GitLab From c0bbf64870e71eb8d6f6e2bcef3d84ba86baf32c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Apr 2025 13:54:15 -0400 Subject: [PATCH 334/899] drm/amdgpu/userq: properly clean up userq fence driver on failure If userq creation fails, we need to properly unwind and free the user queue fence driver. v2: free idr as well (Sunil) Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 79f4df4255c50..41a095c12f956 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -330,6 +330,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); if (r) { DRM_ERROR("Failed to create Queue\n"); + amdgpu_userq_fence_driver_free(queue); kfree(queue); goto unlock; } @@ -337,6 +338,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); if (qid < 0) { DRM_ERROR("Failed to allocate a queue id\n"); + amdgpu_userq_fence_driver_free(queue); uq_funcs->mqd_destroy(uq_mgr, queue); kfree(queue); r = -ENOMEM; @@ -346,6 +348,8 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = uq_funcs->map(uq_mgr, queue); if (r) { DRM_ERROR("Failed to map Queue\n"); + idr_remove(&uq_mgr->userq_idr, qid); + amdgpu_userq_fence_driver_free(queue); uq_funcs->mqd_destroy(uq_mgr, queue); kfree(queue); goto unlock; -- GitLab From 73e12e98ec0c5657c4a0c99c96bc4c608813365d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 16:31:40 -0500 Subject: [PATCH 335/899] drm/amdgpu/userq: add suspend and resume helpers Add helpers to unmap and map user queues on suspend and resume. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 39 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 3 ++ 2 files changed, 42 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 41a095c12f956..f5c2489baae43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -681,3 +681,42 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } + +int amdgpu_userq_suspend(struct amdgpu_device *adev) +{ + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + cancel_delayed_work_sync(&uqm->resume_work); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; + ret |= userq_funcs->unmap(uqm, queue); + } + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_resume(struct amdgpu_device *adev) +{ + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; + ret |= userq_funcs->map(uqm, queue); + } + } + mutex_unlock(&adev->userq_mutex); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h index a0cf6978f2ba8..381b9c6f0573d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h @@ -111,4 +111,7 @@ uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_db_info *db_info, struct drm_file *filp); +int amdgpu_userq_suspend(struct amdgpu_device *adev); +int amdgpu_userq_resume(struct amdgpu_device *adev); + #endif -- GitLab From c2c722217af4d2d17de8665968975a70a08046ea Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Feb 2025 12:16:34 -0500 Subject: [PATCH 336/899] drm/amdgpu/userq: handle system suspend and resume Unmap user queues on suspend and map them on resume. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 475bcd2a8a315..e966aefc2b0f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3513,6 +3513,9 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); amdgpu_amdkfd_suspend(adev, false); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + amdgpu_userq_suspend(adev); +#endif /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); @@ -5081,8 +5084,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) + if (!adev->in_s0ix) { amdgpu_amdkfd_suspend(adev, adev->in_runpm); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + amdgpu_userq_suspend(adev); +#endif + } r = amdgpu_device_evict_resources(adev); if (r) @@ -5149,6 +5156,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + r = amdgpu_userq_resume(adev); + if (r) + goto exit; +#endif } r = amdgpu_device_ip_late_init(adev); -- GitLab From 94fc88f680f94e1c41eb994e6c84873696ddcc6e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Apr 2025 13:17:08 -0400 Subject: [PATCH 337/899] drm/amdgpu: don't swallow errors in amdgpu_userqueue_resume_all() since we loop through the queues |= the errors. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index f5c2489baae43..aa7222137c31d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -423,7 +423,7 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { userq_funcs = adev->userq_funcs[queue->queue_type]; - ret = userq_funcs->map(uq_mgr, queue); + ret |= userq_funcs->map(uq_mgr, queue); } if (ret) -- GitLab From 29891842154d7ebca97a94b0d5aaae94e560f61c Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Fri, 11 Apr 2025 13:01:19 +0800 Subject: [PATCH 338/899] drm/amdgpu: Add the new sdma function pointers for amdgpu_sdma.h This patch introduces new function pointers in the amdgpu_sdma structure to handle queue stop, start and soft reset operations. These will replace the older callback mechanism. The new functions are: - stop_kernel_queue: Stops a specific SDMA queue - start_kernel_queue: Starts/Restores a specific SDMA queue - soft_reset_kernel_queue: Performs soft reset on a specific SDMA queue v2: Update stop_queue/start_queue function paramters to use ring pointer instead of device/instance(Chritian) v3: move stop_queue/start_queue to struct amdgpu_sdma_instance and rename them. (Alex) v4: rework the ordering a bit (Alex) Suggested-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 0b1fbcf0b4d0e..a6c8f07a0da4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,12 @@ enum amdgpu_sdma_irq { #define NUM_SDMA(x) hweight32(x) +struct amdgpu_sdma_funcs { + int (*stop_kernel_queue)(struct amdgpu_ring *ring); + int (*start_kernel_queue)(struct amdgpu_ring *ring); + int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id); +}; + struct amdgpu_sdma_instance { /* SDMA firmware */ const struct firmware *fw; @@ -68,7 +74,7 @@ struct amdgpu_sdma_instance { /* track guilty state of GFX and PAGE queues */ bool gfx_guilty; bool page_guilty; - + const struct amdgpu_sdma_funcs *funcs; }; enum amdgpu_sdma_ras_memory_id { -- GitLab From ce1d40196de5b02ac643bbf36551cf96d0e55852 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Mon, 14 Apr 2025 15:25:16 +0800 Subject: [PATCH 339/899] drm/amdgpu/sdma_v4: Register the new sdma function pointers Register stop/start/soft_reset queue functions for sdma v4_4_2. Suggested-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 673ecd208c6d8..83ebf437802af 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -108,6 +108,8 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -1333,6 +1335,11 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) } } +static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v4_4_2_stop_queue, + .start_kernel_queue = &sdma_v4_4_2_restore_queue, +}; + static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1352,7 +1359,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); sdma_v4_4_2_set_engine_reset_funcs(adev); - return 0; } @@ -1447,6 +1453,7 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) /* Initialize guilty flags for GFX and PAGE queues */ adev->sdma.instance[i].gfx_guilty = false; adev->sdma.instance[i].page_guilty = false; + adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1678,11 +1685,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) return r; } -static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + u32 instance_id = GET_INST(SDMA0, ring->me); u32 inst_mask; uint64_t rptr; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -1715,11 +1723,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ return 0; } -static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { - int i; + struct amdgpu_device *adev = ring->adev; u32 inst_mask; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + int i; inst_mask = 1 << ring->me; udelay(50); @@ -1740,8 +1748,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instan } static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { - .pre_reset = sdma_v4_4_2_stop_queue, - .post_reset = sdma_v4_4_2_restore_queue, }; static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) -- GitLab From 9315860d05aa2604045a5d8f874cf1d36a70da89 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 14 Apr 2025 10:45:12 -0400 Subject: [PATCH 340/899] drm/amdkfd: fix NULL check mistake for process smi event The mistake will lead to NULL kernel oops, so fix it. Fixes: 4172b556fd5b ("drm/amdkfd: add smi events for process start and end") Signed-off-by: Eric Huang Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 727a4ce29fe63..c27fd7aec1c3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -350,7 +350,7 @@ void kfd_smi_event_process(struct kfd_process_device *pdd, bool start) struct amdgpu_task_info *task_info; struct amdgpu_vm *avm; - if (pdd->drm_priv) + if (!pdd->drm_priv) return; avm = drm_priv_to_vm(pdd->drm_priv); -- GitLab From 987718c5598a3044ecca1b6f774900df12927ed5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Apr 2025 15:49:51 -0400 Subject: [PATCH 341/899] drm/amdgpu/userq: move runpm handling into core userq code Pull it out of the MES code and into the generic code. It's not MES specific and needs to be applied to all user queues regardless of the backend. Reviewed-by: Sunil Khatri Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 15 --------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index aa7222137c31d..1867520ba258e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -23,6 +23,8 @@ */ #include +#include + #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_userqueue.h" @@ -257,6 +259,10 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unref(&queue->db_obj.obj); amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; } @@ -280,6 +286,13 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) return -EINVAL; } + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue create\n"); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + /* * There could be a situation that we are creating a new queue while * the other queues under this UQ_mgr are suspended. So if there is any diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index e3c3fc160b799..34f8f63747d59 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -25,7 +25,6 @@ #include "amdgpu_gfx.h" #include "mes_userqueue.h" #include "amdgpu_userq_fence.h" -#include #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE @@ -294,12 +293,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, queue->userq_prop = userq_props; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - if (r < 0) { - dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue mqd create\n"); - goto deference_pm; - } - r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); if (r) { DRM_ERROR("Failed to initialize MQD for userqueue\n"); @@ -327,9 +320,6 @@ free_ctx: free_mqd: amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); -deference_pm: - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); free_props: kfree(userq_props); @@ -341,14 +331,9 @@ static void mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - struct amdgpu_device *adev = uq_mgr->adev; - amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); - - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } const struct amdgpu_userq_funcs userq_mes_funcs = { -- GitLab From 8ff7c78baeeab746432934f7de82d56706c00c50 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 12 Apr 2025 17:39:32 +0300 Subject: [PATCH 342/899] drm/amdgpu: Fix double free in amdgpu_userq_fence_driver_alloc() The goto frees "fence_drv" so this is a double free bug. There is no need to call amdgpu_seq64_free(adev, fence_drv->va) since the seq64 allocation failed so change the goto to goto free_fence_drv. Also propagate the error code from amdgpu_seq64_alloc() instead of hard coding it to -ENOMEM. Fixes: e7cf21fbb277 ("drm/amdgpu: Few optimization and fixes for userq fence driver") Reviewed-by: Arvind Yadav Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index ee73d7846c2ca..e2e0bd6ae807e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -84,11 +84,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, /* Acquire seq64 memory */ r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, &fence_drv->cpu_addr); - if (r) { - kfree(fence_drv); - r = -ENOMEM; - goto free_seq64; - } + if (r) + goto free_fence_drv; memset(fence_drv->cpu_addr, 0, sizeof(u64)); -- GitLab From 0e023c327b30ff306a7c08165a1ab9879c268226 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 12 Apr 2025 17:39:43 +0300 Subject: [PATCH 343/899] drm/amdgpu: Clean up error handling in amdgpu_userq_fence_driver_alloc() 1) Checkpatch complains if we print an error message for kzalloc() failure. The kzalloc() failure already has it's own error messages built in. Also this allocation is small enough that it is guaranteed to succeed. 2) Return directly instead of doing a goto free_fence_drv. The "fence_drv" is already NULL so no cleanup is necessary. Reviewed-by: Arvind Yadav Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index e2e0bd6ae807e..0a3032e01c342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -75,11 +75,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, int r; fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); - if (!fence_drv) { - DRM_ERROR("Failed to allocate memory for fence driver\n"); - r = -ENOMEM; - goto free_fence_drv; - } + if (!fence_drv) + return -ENOMEM; /* Acquire seq64 memory */ r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, -- GitLab From 34c86a0f4487915cf532affe6fedc7ed100378e6 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 22:10:55 -0400 Subject: [PATCH 344/899] drm/amdgpu: rename function to follow naming convention in dce110 The prefix dce110 is used on all functions, but init_pipes() and init_hw(). Under DCN, these sames functions are prefixed. Let's keep thing coherent. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 778b68ec489e0..6fe26094addd7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2758,12 +2758,12 @@ static void dce110_enable_per_frame_crtc_position_reset( } -static void init_pipes(struct dc *dc, struct dc_state *context) +static void dce110_init_pipes(struct dc *dc, struct dc_state *context) { // Do nothing } -static void init_hw(struct dc *dc) +static void dce110_init_hw(struct dc *dc) { int i; struct dc_bios *bp; @@ -3322,7 +3322,7 @@ void dce110_disable_link_output(struct dc_link *link, static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, .program_output_csc = program_output_csc, - .init_hw = init_hw, + .init_hw = dce110_init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dce110_apply_ctx_for_surface, .post_unlock_program_front_end = dce110_post_unlock_program_front_end, @@ -3366,7 +3366,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { }; static const struct hwseq_private_funcs dce110_private_funcs = { - .init_pipes = init_pipes, + .init_pipes = dce110_init_pipes, .set_input_transfer_func = dce110_set_input_transfer_func, .set_output_transfer_func = dce110_set_output_transfer_func, .power_down = dce110_power_down, -- GitLab From 66f6ea421a8031e099920adfa2c57db54bd9af2b Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 22:10:56 -0400 Subject: [PATCH 345/899] drm/amdgpu: add missing parameter name in dce110_clk_src_construct() declaration While not needed per speaking, all the other parameters have names but this one. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h index 0721ae895ae9a..94128f7a18b18 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h @@ -257,7 +257,7 @@ bool dce110_clk_src_construct( struct dce110_clk_src *clk_src, struct dc_context *ctx, struct dc_bios *bios, - enum clock_source_id, + enum clock_source_id id, const struct dce110_clk_src_regs *regs, const struct dce110_clk_src_shift *cs_shift, const struct dce110_clk_src_mask *cs_mask); -- GitLab From 3d5d0d35a780aac31fc586edbccbe5a55a0bfdb1 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 22:10:57 -0400 Subject: [PATCH 346/899] drm/amdgpu: fix typo in atombios.h "aligned" not "aligend" Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/atombios.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index b78360a71bc9a..52bac19fb4049 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -4308,7 +4308,7 @@ typedef struct _ATOM_DPCD_INFO // note2: From RV770, the memory is more than 32bit addressable, so we will change // ucTableFormatRevision=1,ucTableContentRevision=4, the strcuture remains // exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware -// (in offset to start of memory address) is KB aligned instead of byte aligend. +// (in offset to start of memory address) is KB aligned instead of byte aligned. // Note3: /* If we change usReserved to "usFBUsedbyDrvInKB", then to VBIOS this usFBUsedbyDrvInKB is a predefined, unchanged constant across VGA or non VGA adapter, -- GitLab From 5e272fb5eca998f67a6678331713fe643fa7b7e2 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 22:10:58 -0400 Subject: [PATCH 347/899] drm/radeon: fix typo in atombios.h "aligned" not "aligend" Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 2db40789235cb..a7caac5b8ac8a 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -3825,8 +3825,7 @@ typedef struct _ATOM_DPCD_INFO // note2: From RV770, the memory is more than 32bit addressable, so we will change // ucTableFormatRevision=1,ucTableContentRevision=4, the structure remains // exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware -// (in offset to start of memory address) is KB aligned instead of byte aligend. -/***********************************************************************************/ +// (in offset to start of memory address) is KB aligned instead of byte aligned. // Note3: /* If we change usReserved to "usFBUsedbyDrvInKB", then to VBIOS this usFBUsedbyDrvInKB is a predefined, unchanged constant across VGA or non VGA adapter, for CAIL, The size of FB access area is known, only thing missing is the Offset of FB Access area, so we can have: -- GitLab From f82e7cf5f5a9cec0770c63891febf33d8cd3de04 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 22:10:59 -0400 Subject: [PATCH 348/899] drm/amdgpu: fix duplicated value setting in dce100_resource_construct() i2c_speed_in_khz was set twice with the same values. Looking at other DCE versions, we probably wanted to set the value for i2c_speed_in_khz_hdcp. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index e698543ec9374..e0c64861eff38 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -1069,7 +1069,7 @@ static bool dce100_resource_construct( pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz_hdcp = 40; dc->caps.max_cursor_size = 128; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dual_link_dvi = true; -- GitLab From 85207abb401bbaffa5ef1737af660f28afac60a6 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Mon, 7 Apr 2025 22:11:00 -0400 Subject: [PATCH 349/899] drm/amdgpu: fix typo in bios_parser.c Probably a cut and paste error from using get_integrated_info_v8's comment. This has to be get_integrated_info_v9 Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 3bacf470f7c5b..67f08495b7e6e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -2384,10 +2384,10 @@ static enum bp_result get_integrated_info_v8( } /* - * get_integrated_info_v8 + * get_integrated_info_v9 * * @brief - * Get V8 integrated BIOS information + * Get V9 integrated BIOS information * * @param * bios_parser *bp - [in]BIOS parser handler to get master data table -- GitLab From 00ec6732a9ef5af89fb7f7e71604d5227bbdc4de Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sun, 13 Apr 2025 16:51:21 -0400 Subject: [PATCH 350/899] drm/amdgpu: add missing DCE6 to dce_version_to_string() Missing DCE 6.0 6.1 and 6.4 are identified as UNKNOWN. Fix this. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 72b87b78da0e7..7217de2588511 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -689,6 +689,12 @@ void reg_sequence_wait_done(const struct dc_context *ctx) char *dce_version_to_string(const int version) { switch (version) { + case DCE_VERSION_6_0: + return "DCE 6.0"; + case DCE_VERSION_6_1: + return "DCE 6.1"; + case DCE_VERSION_6_4: + return "DCE 6.4"; case DCE_VERSION_8_0: return "DCE 8.0"; case DCE_VERSION_8_1: -- GitLab From 6b9d26089f56fb05c1f13027bfff53a63ba6f64d Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 14 Apr 2025 11:19:01 -0400 Subject: [PATCH 351/899] drm/amdkfd: fix a bug of smi event for superuser rocm-smi with superuser permission doesn't show some of smi events, i.e. page fault/migration, because the condition of "(events & all)" is false. Superuser should be able to detect all events, the condiiton of "(events & all)" seems redundant, so removing it will fix the issue. Signed-off-by: Eric Huang Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index c27fd7aec1c3b..83d9384ac8156 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -163,10 +163,9 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep) static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client, unsigned int event) { - uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS); uint64_t events = READ_ONCE(client->events); - if (pid && client->pid != pid && !(client->suser && (events & all))) + if (pid && client->pid != pid && !client->suser) return false; return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event); -- GitLab From ac9984cee7e17fad030708f6462f272cf82a5f74 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 13 Apr 2025 10:16:58 -0400 Subject: [PATCH 352/899] drm/amdgpu/gfx11: properly reference EOP interrupts for userqs Regardless of whether we disable kernel queues, we need to take an extra reference to the pipe interrupts for user queues to make sure they stay enabled in case we disable them for kernel queues. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ac90f823e596c..a528afe38e0b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4832,10 +4832,10 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, bool enable) { - if (adev->gfx.disable_kq) { - unsigned int irq_type; - int m, p, r; + unsigned int irq_type; + int m, p, r; + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { for (m = 0; m < adev->gfx.me.num_me; m++) { for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; @@ -4849,7 +4849,9 @@ static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, return r; } } + } + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { for (m = 0; m < adev->gfx.mec.num_mec; ++m) { for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4866,6 +4868,7 @@ static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, } } } + return 0; } -- GitLab From e10414cf2e55fd9db7a72862ede04e22fe0c0caf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 13 Apr 2025 10:19:24 -0400 Subject: [PATCH 353/899] drm/amdgpu/gfx12: properly reference EOP interrupts for userqs Regardless of whether we disable kernel queues, we need to take an extra reference to the pipe interrupts for user queues to make sure they stay enabled in case we disable them for kernel queues. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f347921fa9097..4c8958f91edae 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3680,10 +3680,10 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block) static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev, bool enable) { - if (adev->gfx.disable_kq) { - unsigned int irq_type; - int m, p, r; + unsigned int irq_type; + int m, p, r; + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { for (m = 0; m < adev->gfx.me.num_me; m++) { for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; @@ -3697,7 +3697,9 @@ static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev, return r; } } + } + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { for (m = 0; m < adev->gfx.mec.num_mec; ++m) { for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -3714,6 +3716,7 @@ static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev, } } } + return 0; } -- GitLab From 8ae634f10e67ee61b8462cdd95661048271c49bf Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Thu, 10 Apr 2025 11:56:47 +0530 Subject: [PATCH 354/899] drm/amdgpu: Update vcn doorbell range in NBIO 7.9 Increase vcn doorbell range for gfx950 to 11. Signed-off-by: Shiwu Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index f23cb79110d61..a376f072700dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -177,8 +177,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do { u32 doorbell_range = 0, doorbell_ctrl = 0; u32 aid_id = instance; + u32 range_size; if (use_doorbell) { + range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 5, 0)) ? + 0xb : 0x9; doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_OFFSET_ENTRY, @@ -186,7 +190,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_SIZE_ENTRY, - 0x9); + range_size); if (aid_id) doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, @@ -204,7 +208,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, - S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9); + S2A_DOORBELL_PORT1_RANGE_SIZE, range_size); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4); -- GitLab From b574729ff00d0b5dfdabe3b9e8bf52920192d9b4 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Thu, 10 Apr 2025 11:58:16 +0530 Subject: [PATCH 355/899] drm/amdgpu: Enable doorbell for JPEG5_0_1 Enable doorbell for JPEG5_0_1 and adjust index for VCN5_0_1. Signed-off-by: Sathishkumar S Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 218e16b68f1de..6f73033d78b5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -156,7 +156,7 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - ring->use_doorbell = false; + ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); if (!amdgpu_sriov_vf(adev)) { ring->doorbell_index = @@ -264,7 +264,7 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) ring = &adev->jpeg.inst[i].ring_dec[j]; if (ring->use_doorbell) WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, - (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->pipe, ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | VCN_JPEG_DB_CTRL__EN_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d95..4b2e6a033831b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -126,7 +126,7 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * vcn_inst; ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); @@ -213,7 +213,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) if (ring->use_doorbell) adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst), + 11 * vcn_inst), adev->vcn.inst[i].aid_id); /* Re-init fw_shared, if required */ -- GitLab From 2c8b0d628a9904190532b4137be9e645421eb4cf Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Sat, 12 Apr 2025 11:08:01 +0800 Subject: [PATCH 356/899] drm/amd/pm: Enable host limit metrics support Enable host limit metrics support for smuv_13_0_6 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 34ffaa0cfeeb0..3f61c94ea2f84 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -386,6 +386,9 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); if (fw_ver < 0x00555600) smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); + if ((pgm == 7 && fw_ver >= 0x7550E00) || + (pgm == 0 && fw_ver >= 0x00557E00)) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); } if (((pgm == 7) && (fw_ver >= 0x7550700)) || ((pgm == 0) && (fw_ver >= 0x00557900)) || -- GitLab From 1197cfb73083dbe31a8964bacad7932178430257 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 13 Apr 2025 10:31:08 -0400 Subject: [PATCH 357/899] drm/amdgpu/sdma6: properly reference trap interrupts for userqs We need to take a reference to the interrupts to make sure they stay enabled even if the kernel queues have disabled them. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 31 +++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 2249a1ef057bf..c3d53974e7f53 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,11 +1377,39 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v6_0_start(adev); + r = sdma_v6_0_start(adev); + if (r) + return r; + + return sdma_v6_0_set_userq_trap_interrupts(adev, true); } static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1393,6 +1421,7 @@ static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); + sdma_v6_0_set_userq_trap_interrupts(adev, false); return 0; } -- GitLab From 0ed032dc7de5f069cc409fd81abd267ced854b35 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 13 Apr 2025 10:32:23 -0400 Subject: [PATCH 358/899] drm/amdgpu/sdma7: properly reference trap interrupts for userqs We need to take a reference to the interrupts to make sure they stay enabled even if the kernel queues have disabled them. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 31 +++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 4f66b81110bbb..e1a6b15338506 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1353,11 +1353,39 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v7_0_start(adev); + r = sdma_v7_0_start(adev); + if (r) + return r; + + return sdma_v7_0_set_userq_trap_interrupts(adev, true); } static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1369,6 +1397,7 @@ static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v7_0_ctx_switch_enable(adev, false); sdma_v7_0_enable(adev, false); + sdma_v7_0_set_userq_trap_interrupts(adev, false); return 0; } -- GitLab From 172494c4e9f897f0f499a4a86a812dee7b617358 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Sat, 12 Apr 2025 11:14:32 +0800 Subject: [PATCH 359/899] drm/amd/pm: Enable host limit metrics support Enable host limit metrics support for smuv_13_0_12 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3f61c94ea2f84..6d84257b53014 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -337,6 +337,8 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) if (fw_ver >= 0x00561E00) smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); + if (fw_ver >= 0x00562500) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); } static void smu_v13_0_6_init_caps(struct smu_context *smu) -- GitLab From fb20954c9717d1d07d8b8b8f34ac2a2755aec5ff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Apr 2025 14:18:03 -0400 Subject: [PATCH 360/899] drm/amdgpu/userq: rework driver parameter Replace disable_kq parameter with user_queue parameter. The parameter has the following logic: -1 = auto (ASIC specific default) 0 = user queues disabled 1 = user queues enabled and kernel queues enabled (if supported) 2 = user queues enabled and kernel queues disabled The default behavior (-1) is currently the same as 0 for current ASICs. To enable user queues (in addition to kernel queues) set user_queue=1. To enable user queues and disable kernel queues (to make all resources available to user queues), set user_queue=2. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 15 +++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 20 +++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 18 ++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 18 ++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 18 ++++++++++++++++-- 8 files changed, 77 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b156e31ac86ac..3212fd78b0129 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -271,7 +271,7 @@ extern int amdgpu_agp; extern int amdgpu_rebar; extern int amdgpu_wbrf; -extern int amdgpu_disable_kq; +extern int amdgpu_user_queue; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a117cd95b9dcd..e24b0c730baf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -242,7 +242,7 @@ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; int amdgpu_rebar = -1; /* auto */ -int amdgpu_disable_kq = -1; +int amdgpu_user_queue = -1; DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -1114,12 +1114,15 @@ MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = en module_param_named(rebar, amdgpu_rebar, int, 0444); /** - * DOC: disable_kq (int) - * Disable kernel queues on systems that support user queues. - * (0 = kernel queues enabled, 1 = kernel queues disabled, -1 = auto (default setting)) + * DOC: user_queue (int) + * Enable user queues on systems that support user queues. + * -1 = auto (ASIC specific default) + * 0 = user queues disabled + * 1 = user queues enabled and kernel queues enabled (if supported) + * 2 = user queues enabled and kernel queues disabled */ -MODULE_PARM_DESC(disable_kq, "Disable kernel queues (-1 = auto (default), 0 = enable KQ, 1 = disable KQ)"); -module_param_named(disable_kq, amdgpu_disable_kq, int, 0444); +MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)"); +module_param_named(user_queue, amdgpu_user_queue, int, 0444); /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index caaddab31023f..91dd365cb1e6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -487,6 +487,7 @@ struct amdgpu_gfx { struct mutex workload_profile_mutex; bool disable_kq; + bool disable_uq; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index a6c8f07a0da4b..59778b978eb57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -138,6 +138,7 @@ struct amdgpu_sdma { uint32_t supported_reset; struct list_head reset_callback_list; bool no_user_submission; + bool disable_uq; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a528afe38e0b6..70c38e53e6918 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1632,7 +1632,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) { + if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } @@ -1646,7 +1646,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) { + if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } @@ -5211,8 +5211,22 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } adev->gfx.funcs = &gfx_v11_0_gfx_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 4c8958f91edae..1523f5b352c71 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1418,7 +1418,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) { + if (0 && !adev->gfx.disable_uq) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; } @@ -3819,8 +3819,22 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } adev->gfx.funcs = &gfx_v12_0_gfx_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index c3d53974e7f53..6bb36187a53dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1269,8 +1269,22 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) @@ -1351,7 +1365,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) + if (0 && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index e1a6b15338506..943c6446a0a73 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1254,8 +1254,22 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - if (amdgpu_disable_kq == 1) + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) { @@ -1326,7 +1340,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ /* add firmware version checks here */ - if (0) + if (0 && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif -- GitLab From 5ae4591f4ea51519934a14aac537a28f348a8a56 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Tue, 8 Apr 2025 20:25:43 +0800 Subject: [PATCH 361/899] drm/amdgpu: Clear overflow for SRIOV For VF, it doesn't have the permission to clear overflow, clear the bit by reset. Signed-off-by: Emily Deng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 15 +++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 +++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 901f8b12c672d..30f16968b5788 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_ih.h" +#include "amdgpu_reset.h" /** * amdgpu_ih_ring_init - initialize the IH state @@ -227,13 +228,23 @@ restart_ih: ih->rptr &= ih->ptr_mask; } - amdgpu_ih_set_rptr(adev, ih); + if (!ih->overflow) + amdgpu_ih_set_rptr(adev, ih); + wake_up_all(&ih->wait_process); /* make sure wptr hasn't changed while processing */ wptr = amdgpu_ih_get_wptr(adev, ih); if (wptr != ih->rptr) - goto restart_ih; + if (!ih->overflow) + goto restart_ih; + + if (ih->overflow) + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index b0a88f92cd821..7f7ea046e2097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,7 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; uint64_t processed_timestamp; + bool overflow; }; /* return true if time stamp t2 is after t1 with 48bit wrap around */ diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index eb4185dcbd1d4..5900b560b7dee 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) if (ret) return ret; } + ih[i]->overflow = false; } /* update doorbell range for ih ring 0 */ @@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!amdgpu_sriov_vf(adev)) + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + else + ih->overflow = true; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 32). Hopefully diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index faa0dd75dd6d5..85846fd08ce4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -350,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) if (ret) return ret; } + ih[i]->overflow = false; } if (!amdgpu_sriov_vf(adev)) @@ -437,7 +438,10 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!amdgpu_sriov_vf(adev)) + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + else + ih->overflow = true; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 32). Hopefully -- GitLab From 56a0a80af043eadb4af86ab426b3c00624acb832 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Apr 2025 14:16:41 -0400 Subject: [PATCH 362/899] drm/amdgpu/userq: track the xcp_id associated with the queue Track this to align with KFD for enforce isolation handling. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h index 381b9c6f0573d..db79141e1c1e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h @@ -56,6 +56,7 @@ struct amdgpu_usermode_queue { struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *last_fence; + u32 xcp_id; }; struct amdgpu_userq_funcs { -- GitLab From 94976e7e5ede65f9dfad669b1ea7170320f08399 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Apr 2025 13:26:43 -0400 Subject: [PATCH 363/899] drm/amdgpu/userq: add helpers to start/stop scheduling This will be used to stop/start user queue scheduling for example when switching between kernel and user queues when enforce isolation is enabled. v2: use idx v3: only stop compute/gfx queues Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 86 +++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 5 ++ 3 files changed, 84 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3212fd78b0129..68410ba1b9446 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1249,6 +1249,7 @@ struct amdgpu_device { struct list_head userq_mgr_list; struct mutex userq_mutex; + bool userq_halt_for_enforce_isolation; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 1867520ba258e..e944d05685dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -275,6 +275,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; struct amdgpu_db_info db_info; + bool skip_map_queue; uint64_t index; int qid, r = 0; @@ -348,6 +349,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } + qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); if (qid < 0) { DRM_ERROR("Failed to allocate a queue id\n"); @@ -358,15 +360,28 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } - r = uq_funcs->map(uq_mgr, queue); - if (r) { - DRM_ERROR("Failed to map Queue\n"); - idr_remove(&uq_mgr->userq_idr, qid); - amdgpu_userq_fence_driver_free(queue); - uq_funcs->mqd_destroy(uq_mgr, queue); - kfree(queue); - goto unlock; + /* don't map the queue if scheduling is halted */ + mutex_lock(&adev->userq_mutex); + if (adev->userq_halt_for_enforce_isolation && + ((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) + skip_map_queue = true; + else + skip_map_queue = false; + if (!skip_map_queue) { + r = uq_funcs->map(uq_mgr, queue); + if (r) { + mutex_unlock(&adev->userq_mutex); + DRM_ERROR("Failed to map Queue\n"); + idr_remove(&uq_mgr->userq_idr, qid); + amdgpu_userq_fence_driver_free(queue); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + goto unlock; + } } + mutex_unlock(&adev->userq_mutex); + args->out.queue_id = qid; @@ -733,3 +748,58 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) mutex_unlock(&adev->userq_mutex); return ret; } + +int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx) +{ + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + if (adev->userq_halt_for_enforce_isolation) + dev_warn(adev->dev, "userq scheduling already stopped!\n"); + adev->userq_halt_for_enforce_isolation = true; + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + cancel_delayed_work_sync(&uqm->resume_work); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + userq_funcs = adev->userq_funcs[queue->queue_type]; + ret |= userq_funcs->unmap(uqm, queue); + } + } + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx) +{ + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + if (!adev->userq_halt_for_enforce_isolation) + dev_warn(adev->dev, "userq scheduling already started!\n"); + adev->userq_halt_for_enforce_isolation = false; + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + userq_funcs = adev->userq_funcs[queue->queue_type]; + ret |= userq_funcs->map(uqm, queue); + } + } + } + mutex_unlock(&adev->userq_mutex); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h index db79141e1c1e0..0701f33e6740f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h @@ -115,4 +115,9 @@ uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, int amdgpu_userq_suspend(struct amdgpu_device *adev); int amdgpu_userq_resume(struct amdgpu_device *adev); +int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx); +int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx); + #endif -- GitLab From 28fc3172e4204c8cdd8c70226ea02b0ae9930b69 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Feb 2025 15:20:45 -0500 Subject: [PATCH 364/899] drm/amdgpu: rename enforce isolation variables Since they will be used for both KFD and KGD user queues, rename them from kfd to userq. No intended functional change. Acked-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 32 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e966aefc2b0f3..b96e0613ea7e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4368,7 +4368,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_sync_create(&adev->isolation[i].active); amdgpu_sync_create(&adev->isolation[i].prev); } - mutex_init(&adev->gfx.kfd_sch_mutex); + mutex_init(&adev->gfx.userq_sch_mutex); mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); mutex_init(&adev->userq_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 2c933d436e564..c58d32983c45e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1947,39 +1947,40 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, bool enable) { - mutex_lock(&adev->gfx.kfd_sch_mutex); + mutex_lock(&adev->gfx.userq_sch_mutex); if (enable) { /* If the count is already 0, it means there's an imbalance bug somewhere. * Note that the bug may be in a different caller than the one which triggers the * WARN_ON_ONCE. */ - if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) { dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); goto unlock; } - adev->gfx.kfd_sch_req_count[idx]--; + adev->gfx.userq_sch_req_count[idx]--; - if (adev->gfx.kfd_sch_req_count[idx] == 0 && - adev->gfx.kfd_sch_inactive[idx]) { + if (adev->gfx.userq_sch_req_count[idx] == 0 && + adev->gfx.userq_sch_inactive[idx]) { schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); } } else { - if (adev->gfx.kfd_sch_req_count[idx] == 0) { + if (adev->gfx.userq_sch_req_count[idx] == 0) { cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); - if (!adev->gfx.kfd_sch_inactive[idx]) { - amdgpu_amdkfd_stop_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = true; + if (!adev->gfx.userq_sch_inactive[idx]) { + if (adev->kfd.init_complete) + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.userq_sch_inactive[idx] = true; } } - adev->gfx.kfd_sch_req_count[idx]++; + adev->gfx.userq_sch_req_count[idx]++; } unlock: - mutex_unlock(&adev->gfx.kfd_sch_mutex); + mutex_unlock(&adev->gfx.userq_sch_mutex); } /** @@ -2024,12 +2025,11 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) msecs_to_jiffies(1)); } else { /* Tell KFD to resume the runqueue */ - if (adev->kfd.init_complete) { - WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); - WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]); + if (adev->kfd.init_complete) amdgpu_amdkfd_start_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = false; - } + adev->gfx.userq_sch_inactive[idx] = false; } mutex_unlock(&adev->enforce_isolation_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 91dd365cb1e6c..ed54095e6ad69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -475,9 +475,9 @@ struct amdgpu_gfx { bool enable_cleaner_shader; struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; /* Mutex for synchronizing KFD scheduler operations */ - struct mutex kfd_sch_mutex; - u64 kfd_sch_req_count[MAX_XCP]; - bool kfd_sch_inactive[MAX_XCP]; + struct mutex userq_sch_mutex; + u64 userq_sch_req_count[MAX_XCP]; + bool userq_sch_inactive[MAX_XCP]; unsigned long enforce_isolation_jiffies[MAX_XCP]; unsigned long enforce_isolation_time[MAX_XCP]; -- GitLab From 8f23a97907d92e16d34d4a2a9dc793398afa34a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Apr 2025 13:11:10 -0400 Subject: [PATCH 365/899] drm/amdgpu/userq: integrate with enforce isolation Enforce isolation serializes access to the GFX IP. User queues are isolated in the MES scheduler, but we still need to serialize between kernel queues and user queues. For enforce isolation, group KGD user queues with KFD user queues. v2: split out variable renaming, add config guards v3: use new function names Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c58d32983c45e..e1dca45a152b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1970,6 +1970,9 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, if (adev->gfx.userq_sch_req_count[idx] == 0) { cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); if (!adev->gfx.userq_sch_inactive[idx]) { +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx); +#endif if (adev->kfd.init_complete) amdgpu_amdkfd_stop_sched(adev, idx); adev->gfx.userq_sch_inactive[idx] = true; @@ -2027,6 +2030,9 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) /* Tell KFD to resume the runqueue */ WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]); WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + amdgpu_userq_start_sched_for_enforce_isolation(adev, idx); +#endif if (adev->kfd.init_complete) amdgpu_amdkfd_start_sched(adev, idx); adev->gfx.userq_sch_inactive[idx] = false; -- GitLab From 6027cbee190049629d3028789659e0763562e21e Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Mon, 14 Apr 2025 11:14:39 +0800 Subject: [PATCH 366/899] drm/amd/display: Add error check for avi and vendor infoframe setup function The function fill_stream_properties_from_drm_display_mode() calls the function drm_hdmi_avi_infoframe_from_display_mode() and the function drm_hdmi_vendor_infoframe_from_display_mode(), but does not check its return value. Log the error messages to prevent silent failure if either function fails. Signed-off-by: Wentao Liang Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 58e758732338a..d3a2567fc5b86 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6437,6 +6437,7 @@ static void fill_stream_properties_from_drm_display_mode( struct amdgpu_dm_connector *aconnector = NULL; struct hdmi_vendor_infoframe hv_frame; struct hdmi_avi_infoframe avi_frame; + ssize_t err; if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) aconnector = to_amdgpu_dm_connector(connector); @@ -6483,9 +6484,17 @@ static void fill_stream_properties_from_drm_display_mode( } if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { - drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in); + err = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, + (struct drm_connector *)connector, + mode_in); + if (err < 0) + drm_err(connector->dev, "Failed to setup avi infoframe: %zd\n", err); timing_out->vic = avi_frame.video_code; - drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in); + err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, + (struct drm_connector *)connector, + mode_in); + if (err < 0) + drm_err(connector->dev, "Failed to setup vendor infoframe: %zd\n", err); timing_out->hdmi_vic = hv_frame.vic; } -- GitLab From fced8e7d2ddeba7f41b19e065f8c02a9abf9ac00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:54:27 -0500 Subject: [PATCH 367/899] drm/amdgpu: convert userq UAPI _pad to flags Reuse the _pad field for flags. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 4 ++-- include/uapi/drm/amdgpu_drm.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index e944d05685dde..9a7ac85ff01c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -399,7 +399,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_USERQ_OP_CREATE: - if (args->in._pad) + if (args->in.flags) return -EINVAL; r = amdgpu_userqueue_create(filp, args); if (r) @@ -410,7 +410,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, if (args->in.ip_type || args->in.doorbell_handle || args->in.doorbell_offset || - args->in._pad || + args->in.flags || args->in.queue_va || args->in.queue_size || args->in.rptr_va || diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ef97c0d78b8a0..1a451907184cc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -355,7 +355,10 @@ struct drm_amdgpu_userq_in { * and doorbell_offset in the doorbell bo. */ __u32 doorbell_offset; - __u32 _pad; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets. -- GitLab From 024cc8a71aac8194fc2883782d36cbad6a9fe36b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:55:36 -0500 Subject: [PATCH 368/899] drm/amdgpu/userq: add UAPI for setting queue priority Allow the user to set a queue priority levels: 0 - normal low - most apps (maps to MES AMD_PRIORITY_LEVEL_NORMAL) 1 - low - background jobs (maps to MES AMD_PRIORITY_LEVEL_LOW) 2 - normal high - apps that need relative high (maps to MES AMD_PRIORITY_LEVEL_MEDIUM) 3 - high (admin only - for compositors) (maps to MES AMD_PRIORITY_LEVEL_HIGH) Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1a451907184cc..267ed4adcfb99 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -329,6 +329,15 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_OP_CREATE 1 #define AMDGPU_USERQ_OP_FREE 2 +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ + /* * This structure is a container to pass input configuration * info for all supported userqueue related operations. -- GitLab From 3d0a402e7cd11496c40e03b15828c054a70f6fbd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 22:27:47 -0500 Subject: [PATCH 369/899] drm/amdgpu/mes11: add conversion for priority levels Convert driver priority levels to MES11 priority levels. At the moment they are the same, but they may not always be. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f7aa45775eadb..0a5b7a296f08d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -287,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -310,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; -- GitLab From a83be6e4798efbfd3f6ca511806fbaab73a4d7e7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 22:29:15 -0500 Subject: [PATCH 370/899] drm/amdgpu/mes12: add conversion for priority levels Convert driver priority levels to MES11 priority levels. At the moment they are the same, but they may not always be. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b0e042a4cea19..1f7614dccb005 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -274,6 +274,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -297,9 +314,9 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; -- GitLab From 9546c05628a7782508ce4b779b4b00c2276b9e38 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 22:38:08 -0500 Subject: [PATCH 371/899] drm/amdgpu/userq: add priorty to user queue structure So we can track this when we create user queues. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h index 0701f33e6740f..b2da513b3d021 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h @@ -57,6 +57,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *last_fence; u32 xcp_id; + int priority; }; struct amdgpu_userq_funcs { -- GitLab From 23a650bb9f248133095a2e4d7ba7bcbdba9798ab Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 22:47:11 -0500 Subject: [PATCH 372/899] drm/amdgpu/userq/mes: handle user queue priority Handle the queue priority set by the user. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 34f8f63747d59..3b45ab19fc3f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -96,6 +96,21 @@ mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, return 0; } +static int convert_to_mes_priority(int priority) +{ + switch (priority) { + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW: + default: + return AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW: + return AMDGPU_MES_PRIORITY_LEVEL_LOW; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH: + return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH: + return AMDGPU_MES_PRIORITY_LEVEL_HIGH; + } +} + static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { @@ -121,7 +136,7 @@ static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.process_context_addr = ctx->gpu_addr; queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority); queue_input.process_id = queue->vm->pasid; queue_input.queue_type = queue->queue_type; -- GitLab From a5c34299d866f099c85567117717c18842e67da8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 22:56:26 -0500 Subject: [PATCH 373/899] drm/amdgpu/userq: enable support for queue priorities Enable users to create queues at different priority levels. The highest level is restricted to drm master. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 9a7ac85ff01c0..149993ec537b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -22,6 +22,7 @@ * */ +#include #include #include @@ -266,6 +267,21 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) return r; } +static int amdgpu_userq_priority_permit(struct drm_file *filp, + int priority) +{ + if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) { @@ -278,6 +294,9 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) bool skip_map_queue; uint64_t index; int qid, r = 0; + int priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> + AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; /* Usermode queues are only supported for GFX IP as of now */ if (args->in.ip_type != AMDGPU_HW_IP_GFX && @@ -287,6 +306,10 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) return -EINVAL; } + r = amdgpu_userq_priority_permit(filp, priority); + if (r) + return r; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue create\n"); @@ -319,6 +342,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; + queue->priority = priority; db_info.queue_type = queue->queue_type; db_info.doorbell_handle = queue->doorbell_handle; @@ -399,7 +423,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_USERQ_OP_CREATE: - if (args->in.flags) + if (args->in.flags & ~AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) return -EINVAL; r = amdgpu_userqueue_create(filp, args); if (r) -- GitLab From b22659d5d3520c82295981797e75de525365a411 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Fri, 11 Apr 2025 14:48:52 +0800 Subject: [PATCH 374/899] drm/amdgpu: switch amdgpu_sdma_reset_engine to use the new sdma function pointers Replace old callback mechanism with direct calls to stop/start functions. Suggested-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 34 +++--------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index c3c6f03190c89..7139d574c23e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -559,16 +559,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * @adev: Pointer to the AMDGPU device * @instance_id: ID of the SDMA engine instance to reset * - * This function performs the following steps: - * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. - * 2. Resets the specified SDMA engine instance. - * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state. - * * Returns: 0 on success, or a negative error code on failure. */ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) { - struct sdma_on_reset_funcs *funcs; int ret = 0; struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; struct amdgpu_ring *gfx_ring = &sdma_instance->ring; @@ -590,18 +584,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) page_sched_stopped = true; } - /* Invoke all registered pre_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->pre_reset) { - ret = funcs->pre_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "beforeReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } - } + if (sdma_instance->funcs->stop_kernel_queue) + sdma_instance->funcs->stop_kernel_queue(gfx_ring); /* Perform the SDMA reset for the specified instance */ ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); @@ -610,18 +594,8 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) goto exit; } - /* Invoke all registered post_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->post_reset) { - ret = funcs->post_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "afterReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } - } + if (sdma_instance->funcs->start_kernel_queue) + sdma_instance->funcs->start_kernel_queue(gfx_ring); exit: /* Restart the scheduler's work queue for the GFX and page rings -- GitLab From 5c3e7c49538e2ddad10296a318c225bbb3d37d20 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Fri, 11 Apr 2025 15:26:18 +0800 Subject: [PATCH 375/899] drm/amdgpu: Implement SDMA soft reset directly for v5.x This patch introduces a new function `amdgpu_sdma_soft_reset` to handle SDMA soft resets directly, rather than relying on the DPM interface. 1. **New `amdgpu_sdma_soft_reset` Function**: - Implements a soft reset for SDMA engines by directly writing to the hardware registers. - Handles SDMA versions 4.x and 5.x separately: - For SDMA 4.x, the existing `amdgpu_dpm_reset_sdma` function is used for backward compatibility. - For SDMA 5.x, the driver directly manipulates the `GRBM_SOFT_RESET` register to reset the specified SDMA instance. 2. **Integration into `amdgpu_sdma_reset_engine`**: - The `amdgpu_sdma_soft_reset` function is called during the SDMA reset process, replacing the previous call to `amdgpu_dpm_reset_sdma`. v2: r should default to an error (Alex) Suggested-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 38 +++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 7139d574c23e6..f456a5a12bb76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -26,6 +26,8 @@ #include "amdgpu_sdma.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_3_0_sh_mask.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -554,6 +556,40 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct list_add_tail(&funcs->list, &adev->sdma.reset_callback_list); } +static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) +{ + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; + int r = -EOPNOTSUPP; + + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 4, 2): + case IP_VERSION(4, 4, 4): + case IP_VERSION(4, 4, 5): + /* For SDMA 4.x, use the existing DPM interface for backward compatibility */ + r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + break; + case IP_VERSION(5, 0, 0): + case IP_VERSION(5, 0, 1): + case IP_VERSION(5, 0, 2): + case IP_VERSION(5, 0, 5): + case IP_VERSION(5, 2, 0): + case IP_VERSION(5, 2, 2): + case IP_VERSION(5, 2, 4): + case IP_VERSION(5, 2, 5): + case IP_VERSION(5, 2, 6): + case IP_VERSION(5, 2, 3): + case IP_VERSION(5, 2, 1): + case IP_VERSION(5, 2, 7): + if (sdma_instance->funcs->soft_reset_kernel_queue) + r = sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id); + break; + default: + break; + } + + return r; +} + /** * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device @@ -588,7 +624,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) sdma_instance->funcs->stop_kernel_queue(gfx_ring); /* Perform the SDMA reset for the specified instance */ - ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); goto exit; -- GitLab From e56d4bf57fabe009494e9b81ba64e21ebe7d35c2 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Mon, 14 Apr 2025 16:05:33 +0800 Subject: [PATCH 376/899] drm/amdgpu/: drm/amdgpu: Register the new sdma function pointers for sdma_v5_0 Register stop/start/soft_reset queue functions for SDMA IP versions v5.0. Suggested-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 78 +++++++++++++++++++------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index e1348b6d9c6a6..48ee6dc1e3dcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -112,6 +112,8 @@ static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring); +static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring); static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), @@ -1323,6 +1325,36 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id) +{ + u32 grbm_soft_reset; + u32 tmp; + + grbm_soft_reset = REG_SET_FIELD(0, + GRBM_SOFT_RESET, SOFT_RESET_SDMA0, + 1); + grbm_soft_reset <<= instance_id; + + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + return 0; +} + +static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = { + .stop_kernel_queue = &sdma_v5_0_stop_queue, + .start_kernel_queue = &sdma_v5_0_restore_queue, + .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine, +}; + static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1365,6 +1397,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1506,8 +1539,16 @@ static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block) static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, j, r; - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + u32 inst_id = ring->me; + + return amdgpu_sdma_reset_engine(adev, inst_id); +} + +static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) +{ + u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, stat1_reg; + struct amdgpu_device *adev = ring->adev; + int i, j, r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -1562,30 +1603,25 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); +err0: + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} - /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ - preempt = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); - preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); - - soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - udelay(50); - - soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); +static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_id = ring->me; + u32 freeze; + int r; + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* unfreeze*/ - freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE)); freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze); - r = sdma_v5_0_gfx_resume_instance(adev, i, true); - -err0: + r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return r; } -- GitLab From 47454f2dc0bf6774c8dbe7226bcf50ba24a788a8 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Mon, 14 Apr 2025 16:06:51 +0800 Subject: [PATCH 377/899] drm/amdgpu: Register the new sdma function pointers for sdma_v5_2 Register stop/start/soft_reset queue functions for SDMA IP versions v5.2. Suggested-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 94 +++++++++++++++----------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 964f12afac9e3..fc95c2875ba59 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -113,6 +113,8 @@ static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring); static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { @@ -759,37 +761,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) return 0; } -static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block) +static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id) { - struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset; u32 tmp; - int i; - for (i = 0; i < adev->sdma.num_instances; i++) { - grbm_soft_reset = REG_SET_FIELD(0, - GRBM_SOFT_RESET, SOFT_RESET_SDMA0, - 1); - grbm_soft_reset <<= i; + grbm_soft_reset = REG_SET_FIELD(0, + GRBM_SOFT_RESET, SOFT_RESET_SDMA0, + 1); + grbm_soft_reset <<= instance_id; - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + return 0; +} +static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma_v5_2_soft_reset_engine(adev, i); udelay(50); } return 0; } +static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v5_2_stop_queue, + .start_kernel_queue = &sdma_v5_2_restore_queue, + .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine, +}; + /** * sdma_v5_2_start - setup and start the async dma engines * @@ -1302,6 +1316,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1437,8 +1452,16 @@ static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block) static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, j, r; - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + u32 inst_id = ring->me; + + return amdgpu_sdma_reset_engine(adev, inst_id); +} + +static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) +{ + u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, stat1_reg; + struct amdgpu_device *adev = ring->adev; + int i, j, r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -1495,31 +1518,26 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); - /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ - preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); - preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); - - soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; - - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - udelay(50); - - soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); +err0: + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); +static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_id = ring->me; + u32 freeze; + int r; + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* unfreeze and unhalt */ - freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE)); freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze); - r = sdma_v5_2_gfx_resume_instance(adev, i, true); + r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); -err0: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return r; } -- GitLab From 574f4b5562cc11da7beb8c14b51a846da8263f89 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Wed, 16 Apr 2025 14:03:02 +0800 Subject: [PATCH 378/899] drm/amdgpu: optimize queue reset and stop logic for sdma_v5_0 This patch refactors the SDMA v5.0 queue reset and stop logic to improve code readability, maintainability, and performance. The key changes include: 1. **Generalized `sdma_v5_0_gfx_stop` Function**: - Added an `inst_mask` parameter to allow stopping specific SDMA instances instead of all instances. This is useful for resetting individual queues. 2. **Simplified `sdma_v5_0_reset_queue` Function**: - Removed redundant loops and checks by directly using the `ring->me` field to identify the SDMA instance. - Reused the `sdma_v5_0_gfx_stop` function to stop the queue, reducing code duplication. v1: The general coding style is to declare variables like "i" or "r" last. E.g. longest lines first and short lasts. (Chritian) Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 31 ++++++++------------------ 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 48ee6dc1e3dcc..9505ae96fbecc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -557,15 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se * sdma_v5_0_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer - * + * @inst_mask: mask of dma engine instances to be disabled * Stop the gfx async dma ring buffers (NAVI10). */ -static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { u32 rb_cntl, ib_cntl; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -657,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; + uint32_t inst_mask; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!enable) { - sdma_v5_0_gfx_stop(adev); + sdma_v5_0_gfx_stop(adev, 1 << inst_mask); sdma_v5_0_rlc_stop(adev); } @@ -1546,33 +1548,18 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) { - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, stat1_reg; + u32 f32_cntl, freeze, cntl, stat1_reg; struct amdgpu_device *adev = ring->adev; int i, j, r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; - } - - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } - + i = ring->me; amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* stop queue */ - ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - - rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + sdma_v5_0_gfx_stop(adev, 1 << i); /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); -- GitLab From 6a07ac702f022dc85b186e7d2bec6216a0c260b0 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Wed, 16 Apr 2025 14:13:30 +0800 Subject: [PATCH 379/899] drm/amdgpu: optimize queue reset and stop logic for sdma_v5_2 This patch refactors the SDMA v5.2 queue reset and stop logic to improve code readability, maintainability, and performance. The key changes include: 1. **Generalized `sdma_v5_2_gfx_stop` Function**: - Added an `inst_mask` parameter to allow stopping specific SDMA instances instead of all instances. This is useful for resetting individual queues. 2. **Simplified `sdma_v5_2_reset_queue` Function**: - Removed redundant loops and checks by directly using the `ring->me` field to identify the SDMA instance. - Reused the `sdma_v5_2_gfx_stop` function to stop the queue, reducing code duplication. v1: The general coding style is to declare variables like "i" or "r" last. E.g. longest lines first and short lasts. (Chritian) Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 31 ++++++++------------------ 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index fc95c2875ba59..a6e612b4a8928 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -407,15 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se * sdma_v5_2_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer - * + * @inst_mask: mask of dma engine instances to be disabled * Stop the gfx async dma ring buffers. */ -static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) +static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { u32 rb_cntl, ib_cntl; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -506,9 +506,11 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; + uint32_t inst_mask; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!enable) { - sdma_v5_2_gfx_stop(adev); + sdma_v5_2_gfx_stop(adev, inst_mask); sdma_v5_2_rlc_stop(adev); } @@ -1459,33 +1461,18 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) { - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, stat1_reg; + u32 f32_cntl, freeze, cntl, stat1_reg; struct amdgpu_device *adev = ring->adev; int i, j, r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; - } - - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } - + i = ring->me; amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* stop queue */ - ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - - rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + sdma_v5_2_gfx_stop(adev, 1 << i); /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); -- GitLab From 9018c7fe68b55f7ca0df65f0c048dcb6acea90ed Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 17 Apr 2025 15:57:26 +0530 Subject: [PATCH 380/899] drm/amdgpu/userq: add context and seqno of the fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add context and seqno of the fence in error logging rather than printing fence ptr. Reviewed-by: Christian König Suggested-by: Pierre-Eric Pelloux-Prayer Suggested-by: Tvrtko Ursulin Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 149993ec537b2..5f87cc8b5bf4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -44,7 +44,8 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, if (f && !dma_fence_is_signaled(f)) { ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); if (ret <= 0) { - DRM_ERROR("Timed out waiting for fence f=%p\n", f); + DRM_ERROR("Timed out waiting for fence=%llu:%llu\n", + f->context, f->seqno); return; } } @@ -654,7 +655,8 @@ amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) continue; ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); if (ret <= 0) { - DRM_ERROR("Timed out waiting for fence f=%p\n", f); + DRM_ERROR("Timed out waiting for fence=%llu:%llu\n", + f->context, f->seqno); return -ETIMEDOUT; } } -- GitLab From 3f8b6d828210f872d4a84bd2e26440d3767f79fd Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 17 Apr 2025 15:00:44 +0530 Subject: [PATCH 381/899] drm/radeon: fix the warning for radeon_cs_parser_fini Fix the below warning message. radeon/radeon_cs.c:418: warning: Excess function parameter 'backoff' description in 'radeon_cs_parser_fini' Reviewed-by: Tvrtko Ursulin Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 64b26bfeafc95..b8e6202f1d5bc 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -409,7 +409,6 @@ static int cmp_size_smaller_first(void *priv, const struct list_head *a, * radeon_cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. * @error: error number - * @backoff: indicator to backoff the reservation * * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. -- GitLab From 2200b41428ee8d2298ec9d3fd2a22ba2d006096a Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Fri, 11 Apr 2025 15:30:23 +0800 Subject: [PATCH 382/899] drm/amdgpu:remove old sdma reset callback mechanism This patch removes the deprecated SDMA reset callback mechanism, which was previously used to register pre-reset and post-reset callbacks for SDMA engine resets. The callback mechanism has been replaced with a more direct and efficient approach using `stop_queue` and `start_queue` functions in the ring's function table. The SDMA reset callback mechanism allowed KFD and AMDGPU to register pre-reset and post-reset functions for handling SDMA engine resets. However, this approach added unnecessary complexity and was no longer needed after the introduction of the `stop_queue` and `start_queue` functions in the ring's function table. 1. **Remove Callback Mechanism**: - Removed the `amdgpu_sdma_register_on_reset_callbacks` function and its associated data structures (`sdma_on_reset_funcs`). - Removed the callback registration logic from the SDMA v4.4.2 initialization code. 2. **Clean Up Related Code**: - Removed the `sdma_v4_4_2_set_engine_reset_funcs` function, which was used to register the callbacks. - Removed the `sdma_v4_4_2_engine_reset_funcs` structure, which contained the pre-reset and post-reset callback functions. Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 25 ------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 8 -------- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 10 ---------- 3 files changed, 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index f456a5a12bb76..6716ac281c498 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -531,31 +531,6 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin return false; } -/** - * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks - * @adev: Pointer to the AMDGPU device - * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions - * - * This function allows KFD and AMDGPU to register their own callbacks for handling - * pre-reset and post-reset operations for engine reset. These are needed because engine - * reset will stop all queues on that engine. - */ -void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs) -{ - if (!funcs) - return; - - /* Ensure the reset_callback_list is initialized */ - if (!adev->sdma.reset_callback_list.next) { - INIT_LIST_HEAD(&adev->sdma.reset_callback_list); - } - /* Initialize the list node in the callback structure */ - INIT_LIST_HEAD(&funcs->list); - - /* Add the callback structure to the global list */ - list_add_tail(&funcs->list, &adev->sdma.reset_callback_list); -} - static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) { struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 59778b978eb57..5605921212f04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -109,13 +109,6 @@ struct amdgpu_sdma_ras { struct amdgpu_ras_block_object ras_block; }; -struct sdma_on_reset_funcs { - int (*pre_reset)(struct amdgpu_device *adev, uint32_t instance_id); - int (*post_reset)(struct amdgpu_device *adev, uint32_t instance_id); - /* Linked list node to store this structure in a list; */ - struct list_head list; -}; - struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; @@ -178,7 +171,6 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 83ebf437802af..9c169112a5e7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -106,7 +106,6 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); -static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); @@ -1358,7 +1357,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); - sdma_v4_4_2_set_engine_reset_funcs(adev); return 0; } @@ -1747,14 +1745,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) return sdma_v4_4_2_inst_start(adev, inst_mask, true); } -static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { -}; - -static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) -{ - amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_engine_reset_funcs); -} - static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, -- GitLab From d30f61076268fd7ce01e4ec9e4d84bfaf90365f7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 17 Apr 2025 21:05:35 +0530 Subject: [PATCH 383/899] drm/amdgpu: Refine Cleaner Shader MEC firmware version for GFX10.1.x GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the minimum firmware version for the Cleaner Shader in the gfx_v10_0_sw_init function. This change adjusts the minimum required firmware version for the MEC firmware from 152 to 151, allowing for broader compatibility with GFX10.1 GPUs. Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10") Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 00eb4cfecf8f4..e140f673d25a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4828,7 +4828,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && adev->gfx.pfp_fw_version >= 158 && - adev->gfx.mec_fw_version >= 152) { + adev->gfx.mec_fw_version >= 151) { adev->gfx.enable_cleaner_shader = true; r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); if (r) { -- GitLab From 94a62b0f573f868f6f706d96c8c577c2e9b309e0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 17:12:58 -0500 Subject: [PATCH 384/899] drm/amdgpu/userq: add UAPI for setting up secure queues If the queues needs to access TMZ surfaces, it must be set up as secure. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 267ed4adcfb99..284ac25ab5c48 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -337,6 +337,8 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) /* * This structure is a container to pass input configuration -- GitLab From cb808ab833d1a9bbca0c7ed27e5a31c5c2c9f8cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 20:58:42 -0500 Subject: [PATCH 385/899] drm/amdgpu: add tmz queue parameter to mqd props Use this to track the whether we want TMZ for queues. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 68410ba1b9446..decf66c2a7187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -835,6 +835,7 @@ struct amdgpu_mqd_prop { uint64_t gds_bkup_addr; uint64_t csa_addr; uint64_t fence_address; + bool tmz_queue; }; struct amdgpu_mqd { -- GitLab From 3940796a6eefa555fec688a4adee5659ef9fa431 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 17 Apr 2025 10:23:15 -0400 Subject: [PATCH 386/899] drm/amdgpu: Use allowed_domains for pinning dmabufs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When determining the domains for pinning DMABufs, filter allowed_domains and fail with a warning if VRAM is forbidden and GTT is not an allowed domain. Fixes: f5e7fabd1f5c ("drm/amdgpu: allow pinning DMA-bufs into VRAM if all importers can do P2P") Suggested-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 667080cc9ae1c..0446586bd5a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -77,7 +77,7 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct dma_buf *dmabuf = attach->dmabuf; struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); - u32 domains = bo->preferred_domains; + u32 domains = bo->allowed_domains; dma_resv_assert_held(dmabuf->resv); @@ -93,6 +93,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (WARN_ON(!domains)) + return -EINVAL; + return amdgpu_bo_pin(bo, domains); } -- GitLab From 9486875408e775fb1c808744be761af2c0acfc46 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 21:08:56 -0500 Subject: [PATCH 387/899] drm/amdgpu/gfx11: add support for TMZ queues to mqd_init Set up TMZ for queues. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 70c38e53e6918..496e83cb89177 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4142,6 +4142,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4296,6 +4298,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ -- GitLab From eec64449233b58f44154ecb9417e53fceb1e93a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 21:13:02 -0500 Subject: [PATCH 388/899] drm/amdgpu/gfx12: add support for TMZ queues to mqd_init Set up TMZ for queues. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 1523f5b352c71..9cfe50016dab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3013,6 +3013,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -3164,6 +3166,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ -- GitLab From f53d0f48a89ccf13a1598985ab1782ec105cb049 Mon Sep 17 00:00:00 2001 From: Yiling Chen Date: Tue, 4 Mar 2025 16:52:16 +0800 Subject: [PATCH 389/899] drm/amd/display: To apply the adjusted DP ref clock for DP devices [Why] For some pixel clock margin sensitive external monitor, we could not keep original DP ref clock for the ASICs supported SSC DP ref clock. [How] From slicon design team's comment, we have to apply the adjusted DP ref clock for DP devices. DP 128b (DP2) signals uses the DTBCLK not DP ref. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Yiling Chen Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 077337698e0ad..b4f5b4a6331a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -976,11 +976,12 @@ static bool dcn31_program_pix_clk( struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; - // Apply ssed(spread spectrum) dpref clock for edp only. - if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 - && pix_clk_params->signal_type == SIGNAL_TYPE_EDP - && encoding == DP_8b_10b_ENCODING) + // Apply ssed(spread spectrum) dpref clock for edp and dp + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 && + dc_is_dp_signal(pix_clk_params->signal_type) && + encoding == DP_8b_10b_ENCODING) dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; + // For these signal types Driver to program DP_DTO without calling VBIOS Command table if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { if (e) { -- GitLab From 724a4b400bfcce27b155863fc36fe7bef58f768e Mon Sep 17 00:00:00 2001 From: Chris Park Date: Mon, 31 Mar 2025 16:04:55 -0400 Subject: [PATCH 390/899] drm/amd/display: Implement HDMI Read Request [Why] Read Request provides alterative method to polling to the HDMI sinks that support it. [How] Implement Read Request where interrupt can be generated by the sink. Reviewed-by: Joshua Aberback Signed-off-by: Chris Park Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c | 14 ++++++++++++++ drivers/gpu/drm/amd/display/dc/irq_types.h | 7 +++++++ drivers/gpu/drm/amd/display/dc/link/link_factory.c | 4 ++++ .../amd/display/include/gpio_service_interface.h | 3 +++ 6 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7624b909497ee..04b1c7b331a6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1436,6 +1436,7 @@ struct dc_scratch_space { enum signal_type connector_signal; enum dc_irq_source irq_source_hpd; enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ + enum dc_irq_source irq_source_read_request;/* Read Request */ bool is_hpd_filter_disabled; bool dp_ss_off; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 9bfa9ac1b05f5..ab1adc836018e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -210,6 +210,7 @@ struct dc_edid_caps { bool edid_hdmi; bool hdr_supported; + bool rr_capable; struct dc_panel_patch panel_patch; }; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index b099989d9364f..942d9f0b6df25 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -411,6 +411,20 @@ enum dc_irq_source dal_irq_get_rx_source( } } +enum dc_irq_source dal_irq_get_read_request( + const struct gpio *irq) +{ + enum gpio_id id = dal_gpio_get_id(irq); + + switch (id) { + case GPIO_ID_HPD: + return (enum dc_irq_source)(DC_IRQ_SOURCE_DCI2C_RR_DDC1 + + dal_gpio_get_enum(irq)); + default: + return DC_IRQ_SOURCE_INVALID; + } +} + enum gpio_result dal_irq_setup_hpd_filter( struct gpio *irq, struct gpio_hpd_config *config) diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index eadab0a2afebe..a2f7b933bebf9 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -168,6 +168,13 @@ enum dc_irq_source { DC_IRQ_SOURCE_DC5_VLINE2, DC_IRQ_SOURCE_DC6_VLINE2, + DC_IRQ_SOURCE_DCI2C_RR_DDC1, + DC_IRQ_SOURCE_DCI2C_RR_DDC2, + DC_IRQ_SOURCE_DCI2C_RR_DDC3, + DC_IRQ_SOURCE_DCI2C_RR_DDC4, + DC_IRQ_SOURCE_DCI2C_RR_DDC5, + DC_IRQ_SOURCE_DCI2C_RR_DDC6, + DAL_IRQ_SOURCES_NUMBER }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 70f54bdbbc64e..0125f2cfc114a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -464,6 +464,7 @@ static bool construct_phy(struct dc_link *link, link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID; + link->irq_source_read_request = DC_IRQ_SOURCE_INVALID; link->link_status.dpcd_caps = &link->dpcd_caps; link->dc = init_params->dc; @@ -514,6 +515,9 @@ static bool construct_phy(struct dc_link *link, case CONNECTOR_ID_HDMI_TYPE_A: link->connector_signal = SIGNAL_TYPE_HDMI_TYPE_A; + if (link->hpd_gpio) + link->irq_source_read_request = + dal_irq_get_read_request(link->hpd_gpio); break; case CONNECTOR_ID_SINGLE_LINK_DVID: case CONNECTOR_ID_SINGLE_LINK_DVII: diff --git a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h index 7e3240e73c1fc..63813009a3a68 100644 --- a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h +++ b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h @@ -86,6 +86,9 @@ enum dc_irq_source dal_irq_get_source( enum dc_irq_source dal_irq_get_rx_source( const struct gpio *irq); +enum dc_irq_source dal_irq_get_read_request( + const struct gpio *irq); + enum gpio_result dal_irq_setup_hpd_filter( struct gpio *irq, struct gpio_hpd_config *config); -- GitLab From 7e40f64896e8e3dca471e287672db5ace12ea0be Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 21 Jan 2025 16:03:52 -0600 Subject: [PATCH 391/899] drm/amd/display: Avoid divide by zero by initializing dummy pitch to 1 [Why] If the dummy values in `populate_dummy_dml_surface_cfg()` aren't updated then they can lead to a divide by zero in downstream callers like CalculateVMAndRowBytes() [How] Initialize dummy value to a value to avoid divide by zero. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1b..857ce1695fd55 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -896,7 +896,7 @@ static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsig out->SurfaceWidthC[location] = in->timing.h_addressable; out->SurfaceHeightC[location] = in->timing.v_addressable; out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128; - out->PitchC[location] = 0; + out->PitchC[location] = 1; out->DCCEnable[location] = false; out->DCCMetaPitchY[location] = 0; out->DCCMetaPitchC[location] = 0; -- GitLab From 6df71752632e39ffa189092b9ca16dddc6bbea7b Mon Sep 17 00:00:00 2001 From: Jack Chang Date: Thu, 19 Dec 2024 16:10:18 +0800 Subject: [PATCH 392/899] drm/amd/display: Move desync error counter operation up. [Why & How] Move desync error counter operation up to prevent it from being skipped by force disable desync error. Reviewed-by: Robin Chen Reviewed-by: Aric Cyr Signed-off-by: Jack Chang Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../amd/display/dc/link/protocols/link_dp_irq_handler.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 5be00e4ce10b3..991b8ad4984b6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -229,6 +229,10 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw; + /* Increment desync error counter if a desync error is detected */ + if (replay_configuration.bits.DESYNC_ERROR_STATUS) + link->replay_settings.replay_desync_error_fail_count++; + if (link->replay_settings.config.force_disable_desync_error_check) return; @@ -240,9 +244,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_configuration.raw, sizeof(replay_configuration.raw)); - /* Update desync error counter */ - link->replay_settings.replay_desync_error_fail_count++; - /* Acknowledge and clear error bits */ dm_helpers_dp_write_dpcd( link->ctx, -- GitLab From 05185812ae3695fe049c14847ce3cbeccff1bf2e Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Apr 2025 23:58:28 -0400 Subject: [PATCH 393/899] drm/amdgpu: Don't pin VRAM without DMABUF_MOVE_NOTIFY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pinning of VRAM is for peer devices that don't support dynamic attachment and move notifiers. But it requires that all such peer devices are able to access VRAM via PCIe P2P. Any device without P2P access requires migration to GTT, which fails if the memory is already pinned for another peer device. Sharing between GPUs should not require pinning in VRAM. However, if DMABUF_MOVE_NOTIFY is disabled in the kernel build, even DMABufs shared between GPUs must be pinned, which can lead to failures and functional regressions on systems where some peer GPUs are not P2P accessible. Disable VRAM pinning if move notifiers are disabled in the kernel build to fix regressions when sharing BOs between GPUs. Signed-off-by: Felix Kuehling Tested-by: Hao (Claire) Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 0446586bd5a70..5740e8d1a5226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -81,14 +81,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) dma_resv_assert_held(dmabuf->resv); - /* - * Try pinning into VRAM to allow P2P with RDMA NICs without ODP + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP * support if all attachments can do P2P. If any attachment can't do * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. */ - list_for_each_entry(attach, &dmabuf->attachments, node) - if (!attach->peer2peer) - domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; -- GitLab From 8fc3959cd4da33d703ceb1166830407b673d8a78 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Tue, 1 Apr 2025 14:38:31 -0400 Subject: [PATCH 394/899] drm/amd/display: Move Mode Support Prefetch Checks To Its Own Function [Why] Large stack size observed in DCN4 mode support when compiling with clang. Additional instrumentation added by compiler adds to stack size. dml_core_mode_support ends up going over the stack size limit due to the size of the function. [How] Move checks and calculations for prefetch to its own function. Reviewed-by: Aurabindo Pillai Signed-off-by: Austin Zheng Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 1277 +++++++++-------- 1 file changed, 642 insertions(+), 635 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index a27409464616c..6f71b72213849 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -7264,6 +7264,647 @@ static void calculate_pstate_keepout_dst_lines( } } +static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib, + const struct dml2_display_cfg *display_cfg) +{ + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; +#ifdef DML_GLOBAL_PREFETCH_CHECK + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; +#endif + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + + double min_return_bw_for_latency; + unsigned int k; + + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + mode_lib->ms.Total3dlutActive = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; + + // Calculate tdlut schedule related terms + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; + calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + min_return_bw_for_latency, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.request_size_bytes_luma, + mode_lib->ms.support.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->ms.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->ms.ExtraLatency, + &mode_lib->ms.ExtraLatency_sr, + &mode_lib->ms.ExtraLatencyPrefetch); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + s->impacted_dst_y_pre[k] = 0; + + s->recalc_prefetch_schedule = 0; + s->recalc_prefetch_done = 0; + do { + mode_lib->ms.support.PrefetchSupported = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + mode_lib->ms.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->ms.UrgLatency, + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0); + + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->ms.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; + + mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + } // for k num_planes + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + mode_lib->ms.RequiredDISPCLK, + s->tdlut_bytes_to_deliver, + s->prefetch_swath_time_us, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.dst_y_prefetch[k] < 2.0 + || mode_lib->ms.LinesForVM[k] >= 32.0 + || mode_lib->ms.LinesForDPTERow[k] >= 16.0 + || mode_lib->ms.NoTimeForPrefetch[k] == true + || s->DSTYAfterScaler[k] > 8) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + } + } + + mode_lib->ms.support.DynamicMetadataSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { + mode_lib->ms.support.DynamicMetadataSupported = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { + mode_lib->ms.support.VRatioInPrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + } + } + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; + + // By default, do not recalc prefetch schedule + s->recalc_prefetch_schedule = 0; + + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok + if (mode_lib->ms.support.PrefetchSupported) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + // Calculate Urgent burst factor for prefetch +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); +#endif + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + s->line_times[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.VRatioPreY[k], + mode_lib->ms.VRatioPreC[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + + // Calculate urgent bandwidth required, both urg and non urg peak bandwidth + // assume flip bw is 0 at this point + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 0; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth + &s->dummy_single[1], // double* frac_urg_bandwidth_mall + &mode_lib->ms.support.UrgVactiveBandwidthSupport, + &mode_lib->ms.support.PrefetchBandwidthSupported, + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.support.non_urg_bandwidth_required, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; + DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + } + +#ifdef DML_GLOBAL_PREFETCH_CHECK + if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; + if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; + + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / + ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); + s->recalc_prefetch_done = 1; + s->recalc_prefetch_schedule = 1; + } +#endif + } // prefetch schedule ok, do urg bw and flip schedule + } while (s->recalc_prefetch_schedule); + + // Flip Schedule + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_qual, // no flip + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; + s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mSOCParameters.USRRetrainingLatency = 0; + s->mSOCParameters.SMNLatency = 0; + s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; + CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; + CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); + DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); + +} + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; @@ -7271,19 +7912,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; double outstanding_latency_us = 0; - double min_return_bw_for_latency; struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; -#ifdef DML_GLOBAL_PREFETCH_CHECK - struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; -#endif - struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; - struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; unsigned int k, m, n; @@ -8806,633 +9439,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } - /* Prefetch Check */ - { - mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - - calculate_hostvm_inefficiency_factor( - &s->HostVMInefficiencyFactor, - &s->HostVMInefficiencyFactorPrefetch, - - display_cfg->gpuvm_enable, - display_cfg->hostvm_enable, - mode_lib->ip.remote_iommu_outstanding_translations, - mode_lib->soc.max_outstanding_reqs, - mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], - mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); - - mode_lib->ms.Total3dlutActive = 0; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) - mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; - - // Calculate tdlut schedule related terms - calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; - calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; - calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; - calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; - calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; - calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; - calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); - - // output - calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; - calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; - calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; - calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; - - calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); - } - - min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); - - CalculateExtraLatency( - display_cfg, - mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, - s->ReorderingBytes, - mode_lib->ms.DCFCLK, - mode_lib->ms.FabricClock, - mode_lib->ip.pixel_chunk_size_kbytes, - min_return_bw_for_latency, - mode_lib->ms.num_active_planes, - mode_lib->ms.NoOfDPP, - mode_lib->ms.dpte_group_bytes, - s->tdlut_bytes_per_group, - s->HostVMInefficiencyFactor, - s->HostVMInefficiencyFactorPrefetch, - mode_lib->soc.hostvm_min_page_size_kbytes, - mode_lib->soc.qos_parameters.qos_type, - !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), - mode_lib->soc.max_outstanding_reqs, - mode_lib->ms.support.request_size_bytes_luma, - mode_lib->ms.support.request_size_bytes_chroma, - mode_lib->ip.meta_chunk_size_kbytes, - mode_lib->ip.dchub_arb_to_ret_delay, - mode_lib->ms.TripToMemory, - mode_lib->ip.hostvm_mode, - - // output - &mode_lib->ms.ExtraLatency, - &mode_lib->ms.ExtraLatency_sr, - &mode_lib->ms.ExtraLatencyPrefetch); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - s->impacted_dst_y_pre[k] = 0; - - s->recalc_prefetch_schedule = 0; - s->recalc_prefetch_done = 0; - do { - mode_lib->ms.support.PrefetchSupported = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; - - s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->ms.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane0.width, - display_cfg->plane_descriptors[k].composition.viewport.plane0.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->ms.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane1.width, - display_cfg->plane_descriptors[k].composition.viewport.plane1.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; - - mode_lib->ms.TWait[k] = CalculateTWait( - display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, - mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory, - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); - - myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; - myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; - myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; - myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; - myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; - myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; - myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; - myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; - myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; - myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; - myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; - myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; - myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; - myPipe->ODMMode = mode_lib->ms.ODMMode[k]; - myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; - myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); -#endif - CalculatePrefetchSchedule_params->display_cfg = display_cfg; - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; - CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; - CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; - CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; - CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; - CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; - CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); - CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; - CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; - CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; - CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; - CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; - CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; - CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; - - // output - CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; - CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; - CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c - CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; - CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; - CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; - CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; - CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; - CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; - CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; - CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; - CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; - CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; - CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; - - mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); - - mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; - DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); - } // for k num_planes - - CalculateDCFCLKDeepSleepTdlut( - display_cfg, - mode_lib->ms.num_active_planes, - mode_lib->ms.BytePerPixelY, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.SwathWidthY, - mode_lib->ms.SwathWidthC, - mode_lib->ms.NoOfDPP, - mode_lib->ms.PSCL_FACTOR, - mode_lib->ms.PSCL_FACTOR_CHROMA, - mode_lib->ms.RequiredDPPCLK, - mode_lib->ms.vactive_sw_bw_l, - mode_lib->ms.vactive_sw_bw_c, - mode_lib->soc.return_bus_width_bytes, - mode_lib->ms.RequiredDISPCLK, - s->tdlut_bytes_to_deliver, - s->prefetch_swath_time_us, - - /* Output */ - &mode_lib->ms.dcfclk_deepsleep); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.dst_y_prefetch[k] < 2.0 - || mode_lib->ms.LinesForVM[k] >= 32.0 - || mode_lib->ms.LinesForDPTERow[k] >= 16.0 - || mode_lib->ms.NoTimeForPrefetch[k] == true - || s->DSTYAfterScaler[k] > 8) { - mode_lib->ms.support.PrefetchSupported = false; - DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); - } - } - - mode_lib->ms.support.DynamicMetadataSupported = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { - mode_lib->ms.support.DynamicMetadataSupported = false; - } - } - - mode_lib->ms.support.VRatioInPrefetchSupported = true; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { - mode_lib->ms.support.VRatioInPrefetchSupported = false; - DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); - } - } - - mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; - - // By default, do not recalc prefetch schedule - s->recalc_prefetch_schedule = 0; - - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok - if (mode_lib->ms.support.PrefetchSupported) { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - // Calculate Urgent burst factor for prefetch -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); -#endif - CalculateUrgentBurstFactor( - &display_cfg->plane_descriptors[k], - mode_lib->ms.swath_width_luma_ub[k], - mode_lib->ms.swath_width_chroma_ub[k], - mode_lib->ms.SwathHeightY[k], - mode_lib->ms.SwathHeightC[k], - s->line_times[k], - mode_lib->ms.UrgLatency, - mode_lib->ms.VRatioPreY[k], - mode_lib->ms.VRatioPreC[k], - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeY[k], - mode_lib->ms.DETBufferSizeC[k], - /* Output */ - &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChromaPre[k], - &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); - } - - // Calculate urgent bandwidth required, both urg and non urg peak bandwidth - // assume flip bw is 0 at this point - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - mode_lib->ms.final_flip_bw[k] = 0; - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 0; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - // Check urg peak bandwidth against available urg bw - // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) - check_urgent_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth - &s->dummy_single[1], // double* frac_urg_bandwidth_mall - &mode_lib->ms.support.UrgVactiveBandwidthSupport, - &mode_lib->ms.support.PrefetchBandwidthSupported, - - mode_lib->soc.mall_allocated_for_dcn_mbytes, - mode_lib->ms.support.non_urg_bandwidth_required, - mode_lib->ms.support.urg_vactive_bandwidth_required, - mode_lib->ms.support.urg_bandwidth_required, - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; - DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { - mode_lib->ms.support.PrefetchSupported = false; - DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); - } - } - -#ifdef DML_GLOBAL_PREFETCH_CHECK - if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { - CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; - CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; - CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; - CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; - CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; - CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; - CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; - CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; - CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; - if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; - - CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / - ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); - - // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible - CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; - CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; - mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); - s->recalc_prefetch_done = 1; - s->recalc_prefetch_schedule = 1; - } -#endif - } // prefetch schedule ok, do urg bw and flip schedule - } while (s->recalc_prefetch_schedule); - - // Flip Schedule - // Both prefetch schedule and BW okay - if (mode_lib->ms.support.PrefetchSupported == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = - get_bandwidth_available_for_immediate_flip( - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_qual, // no flip - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip) { - s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( - s->HostVMInefficiencyFactor, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.meta_row_bytes[k]); - } else { - s->per_pipe_flip_bytes[k] = 0; - } - mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; - - } - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - CalculateFlipSchedule( - &mode_lib->scratch, - display_cfg->plane_descriptors[k].immediate_flip, - 1, // use_lb_flip_bw - s->HostVMInefficiencyFactor, - s->Tvm_trips_flip[k], - s->Tr0_trips_flip[k], - s->Tvm_trips_flip_rounded[k], - s->Tr0_trips_flip_rounded[k], - display_cfg->gpuvm_enable, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - display_cfg->plane_descriptors[k].pixel_format, - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ms.Tno_bw_flip[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[k], - mode_lib->ip.max_flip_time_us, - mode_lib->ip.max_flip_time_lines, - s->per_pipe_flip_bytes[k], - mode_lib->ms.meta_row_bytes[k], - s->meta_row_height_luma[k], - s->meta_row_height_chroma[k], - mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, - - /* Output */ - &mode_lib->ms.dst_y_per_vm_flip[k], - &mode_lib->ms.dst_y_per_row_flip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 1; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - calculate_immediate_flip_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth_flip - &mode_lib->ms.support.ImmediateFlipSupport, - - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_flip, - mode_lib->ms.support.non_urg_bandwidth_required_flip, - mode_lib->ms.support.urg_bandwidth_available); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) - mode_lib->ms.support.ImmediateFlipSupport = false; - } - - } else { // if prefetch not support, assume iflip is not supported too - mode_lib->ms.support.ImmediateFlipSupport = false; - } - - s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; - s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; - s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; - s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; - s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; - s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; - s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; - s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; - s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; - s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; - s->mSOCParameters.USRRetrainingLatency = 0; - s->mSOCParameters.SMNLatency = 0; - s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); - s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); - s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; - s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; - - CalculateWatermarks_params->display_cfg = display_cfg; - CalculateWatermarks_params->USRRetrainingRequired = false; - CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; - CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; - CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; - CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; - CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; - CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; - CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; - CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; - CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; - CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; - CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; - CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; - CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; - CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; - CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; - - // Output - CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; - CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; - CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; - CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; - CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; - CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); - - calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); - } - DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); - // End of Prefetch Check + dml_core_ms_prefetch_check(mode_lib, display_cfg); mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; -- GitLab From d91bc901398741d317d9b55c59ca949d4bc7394b Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 1 Apr 2025 17:05:10 -0400 Subject: [PATCH 395/899] drm/amd/display: Fix gpu reset in multidisplay config [Why] The indexing of stream_status in dm_gpureset_commit_state() is incorrect. That leads to asserts in multi-display configuration after gpu reset. [How] Adjust the indexing logic to align stream_status with surface_updates. Fixes: cdaae8371aa9 ("drm/amd/display: Handle GPU reset for DC block") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3808 Reviewed-by: Aurabindo Pillai Reviewed-by: Mario Limonciello Signed-off-by: Roman Li Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3a2567fc5b86..371c3edbb02e8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3411,16 +3411,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); -- GitLab From 2ba8619b9a378ad218ad6c2e2ccaee8f531e08de Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 26 Mar 2025 10:33:51 -0400 Subject: [PATCH 396/899] drm/amd/display: Force full update in gpu reset [Why] While system undergoing gpu reset always do full update to sync the dc state before and after reset. [How] Return true in should_reset_plane() if gpu reset detected Reviewed-by: Aurabindo Pillai Reviewed-by: Mario Limonciello Signed-off-by: Roman Li Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 371c3edbb02e8..e700b1edac2c9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11131,6 +11131,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; -- GitLab From c9646e5a7e01c3ede286ec5edd4fcb2e1e80261d Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Mon, 31 Mar 2025 15:59:51 -0400 Subject: [PATCH 397/899] drm/amd/display: DCN32 null data check [WHY & HOW] Avoid null curve data structure used in the cm block for the potential issue. Reviewed-by: Charlene Liu Signed-off-by: Yihan Zhu Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c | 380 +++++++++--------- 1 file changed, 192 insertions(+), 188 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c index a0e9e9f0441a4..b4cea2b8cb2a8 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c @@ -370,275 +370,279 @@ void mpc32_program_shaper_luta_settings( MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y); curve = params->arr_curve_points; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0, + if (curve) { + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); -} - - -void mpc32_program_shaper_lutb_settings( - struct mpc *mpc, - const struct pwl_params *params, - uint32_t mpcc_id) -{ - const struct gamma_curve *curve; - struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); - - REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_B[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].blue.custom_float_x, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0); - REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_G[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].green.custom_float_x, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0); - REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_R[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].red.custom_float_x, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0); - - REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_B[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].blue.custom_float_x, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].blue.custom_float_y); - REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_G[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].green.custom_float_x, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].green.custom_float_y); - REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_R[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].red.custom_float_x, - MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y); - - curve = params->arr_curve_points; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0, + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + } +} + + +void mpc32_program_shaper_lutb_settings( + struct mpc *mpc, + const struct pwl_params *params, + uint32_t mpcc_id) +{ + const struct gamma_curve *curve; + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_B[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].blue.custom_float_x, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0); + REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_G[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].green.custom_float_x, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0); + REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_R[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].red.custom_float_x, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0, + REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_B[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].blue.custom_float_x, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].blue.custom_float_y); + REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_G[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].green.custom_float_x, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].green.custom_float_y); + REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_R[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].red.custom_float_x, + MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y); + + curve = params->arr_curve_points; + if (curve) { + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + } } -- GitLab From 652968d996d70105423f82536e137a07e1d947c1 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Tue, 1 Apr 2025 10:28:36 -0400 Subject: [PATCH 398/899] drm/amd/display: DCN42 RMCM and MCM 3DLUT support [WHY & HOW] Providing hardware programming for the RMCM and MCM IPs for 3DLUT in DCN42. Reviewed-by: Charlene Liu Signed-off-by: Yihan Zhu Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 9 +- .../dc/dml2/dml21/dml21_translation_helper.c | 3 + .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 351 ++++++++++++++++-- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 8 + drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 29 ++ 5 files changed, 363 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index ab1adc836018e..a4cd0eb39a3af 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1255,6 +1255,7 @@ enum dc_cm2_gpu_mem_layout { enum dc_cm2_gpu_mem_pixel_component_order { DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA, + DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA }; enum dc_cm2_gpu_mem_format { @@ -1276,7 +1277,8 @@ struct dc_cm2_gpu_mem_format_parameters { enum dc_cm2_gpu_mem_size { DC_CM2_GPU_MEM_SIZE_171717, - DC_CM2_GPU_MEM_SIZE_TRANSFORMED + DC_CM2_GPU_MEM_SIZE_333333, + DC_CM2_GPU_MEM_SIZE_TRANSFORMED, }; struct dc_cm2_gpu_mem_parameters { @@ -1285,6 +1287,7 @@ struct dc_cm2_gpu_mem_parameters { struct dc_cm2_gpu_mem_format_parameters format_params; enum dc_cm2_gpu_mem_pixel_component_order component_order; enum dc_cm2_gpu_mem_size size; + uint16_t bit_depth; }; enum dc_cm2_transfer_func_source { @@ -1308,6 +1311,10 @@ struct dc_cm2_func_luts { const struct dc_3dlut *lut3d_func; struct dc_cm2_gpu_mem_parameters gpu_mem_params; }; + bool rmcm_3dlut_shaper_select; + bool mpc_3dlut_enable; + bool rmcm_3dlut_enable; + bool mpc_mcm_post_blend; } lut3d_data; const struct dc_transfer_func *lut1d_func; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0c8ec30ea6726..f721aabdd470a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -885,6 +885,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; break; + case DC_CM2_GPU_MEM_SIZE_333333: + plane->tdlut.tdlut_width_mode = dml2_tdlut_width_33_cube; + break; case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f64..423c0d28218b9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -396,6 +396,249 @@ static void dcn401_get_mcm_lut_xable_from_pipe_ctx(struct dc *dc, struct pipe_ct } } +static void dcn401_set_mcm_location_post_blend(struct dc *dc, struct pipe_ctx *pipe_ctx, bool bPostBlend) +{ + struct mpc *mpc = dc->res_pool->mpc; + int mpcc_id = pipe_ctx->plane_res.hubp->inst; + + if (!pipe_ctx->plane_state) + return; + + mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); + pipe_ctx->plane_state->mcm_location = (bPostBlend) ? + MPCC_MOVABLE_CM_LOCATION_AFTER : + MPCC_MOVABLE_CM_LOCATION_BEFORE; +} + +static void dc_get_lut_mode( + enum dc_cm2_gpu_mem_layout layout, + enum hubp_3dlut_fl_mode *mode, + enum hubp_3dlut_fl_addressing_mode *addr_mode) +{ + switch (layout) { + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: + *mode = hubp_3dlut_fl_mode_native_1; + *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: + *mode = hubp_3dlut_fl_mode_native_2; + *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: + *mode = hubp_3dlut_fl_mode_transform; + *addr_mode = hubp_3dlut_fl_addressing_mode_simple_linear; + break; + default: + *mode = hubp_3dlut_fl_mode_disable; + *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + } +} + +static void dc_get_lut_format( + enum dc_cm2_gpu_mem_format dc_format, + enum hubp_3dlut_fl_format *format) +{ + switch (dc_format) { + case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB: + *format = hubp_3dlut_fl_format_unorm_12msb_bitslice; + break; + case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB: + *format = hubp_3dlut_fl_format_unorm_12lsb_bitslice; + break; + case DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10: + *format = hubp_3dlut_fl_format_float_fp1_5_10; + break; + } +} + +static void dc_get_lut_xbar( + enum dc_cm2_gpu_mem_pixel_component_order order, + enum hubp_3dlut_fl_crossbar_bit_slice *cr_r, + enum hubp_3dlut_fl_crossbar_bit_slice *y_g, + enum hubp_3dlut_fl_crossbar_bit_slice *cb_b) +{ + switch (order) { + case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: + *cr_r = hubp_3dlut_fl_crossbar_bit_slice_32_47; + *y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; + *cb_b = hubp_3dlut_fl_crossbar_bit_slice_0_15; + break; + case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA: + *cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; + *y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; + *cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; + break; + } +} + +static void dc_get_lut_width( + enum dc_cm2_gpu_mem_size size, + enum hubp_3dlut_fl_width *width) +{ + switch (size) { + case DC_CM2_GPU_MEM_SIZE_333333: + *width = hubp_3dlut_fl_width_33; + break; + case DC_CM2_GPU_MEM_SIZE_171717: + *width = hubp_3dlut_fl_width_17; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + *width = hubp_3dlut_fl_width_transformed; + break; + } +} +static bool dc_is_rmcm_3dlut_supported(struct hubp *hubp, struct mpc *mpc) +{ + if (mpc->funcs->rmcm.update_3dlut_fast_load_select && + mpc->funcs->rmcm.program_lut_read_write_control && + hubp->funcs->hubp_program_3dlut_fl_addr && + mpc->funcs->rmcm.program_bit_depth && + hubp->funcs->hubp_program_3dlut_fl_mode && + hubp->funcs->hubp_program_3dlut_fl_addressing_mode && + hubp->funcs->hubp_program_3dlut_fl_format && + hubp->funcs->hubp_update_3dlut_fl_bias_scale && + mpc->funcs->rmcm.program_bias_scale && + hubp->funcs->hubp_program_3dlut_fl_crossbar && + hubp->funcs->hubp_program_3dlut_fl_width && + mpc->funcs->rmcm.update_3dlut_fast_load_select && + mpc->funcs->rmcm.populate_lut && + mpc->funcs->rmcm.program_lut_mode && + hubp->funcs->hubp_enable_3dlut_fl && + mpc->funcs->rmcm.enable_3dlut_fl) + return true; + + return false; +} + +bool dcn401_program_rmcm_luts( + struct hubp *hubp, + struct pipe_ctx *pipe_ctx, + enum dc_cm2_transfer_func_source lut3d_src, + struct dc_cm2_func_luts *mcm_luts, + struct mpc *mpc, + bool lut_bank_a, + int mpcc_id) +{ + struct dpp *dpp_base = pipe_ctx->plane_res.dpp; + union mcm_lut_params m_lut_params; + enum MCM_LUT_XABLE shaper_xable, lut3d_xable = MCM_LUT_DISABLE, lut1d_xable; + enum hubp_3dlut_fl_mode mode; + enum hubp_3dlut_fl_addressing_mode addr_mode; + enum hubp_3dlut_fl_format format; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r; + enum hubp_3dlut_fl_width width; + struct dc *dc = hubp->ctx->dc; + + bool bypass_rmcm_3dlut = false; + bool bypass_rmcm_shaper = false; + + dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); + + /* 3DLUT */ + switch (lut3d_src) { + case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: + memset(&m_lut_params, 0, sizeof(m_lut_params)); + // Don't know what to do in this case. + //case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: + break; + case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + dc_get_lut_width(mcm_luts->lut3d_data.gpu_mem_params.size, &width); + if (!dc_is_rmcm_3dlut_supported(hubp, mpc) || + !mpc->funcs->rmcm.is_config_supported(width)) + return false; + + //0. disable fl on mpc + mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, 0xF); + + //1. power down the block + mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, false); + + //2. program RMCM + //2a. 3dlut reg programming + mpc->funcs->rmcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, + (!bypass_rmcm_3dlut) && lut3d_xable != MCM_LUT_DISABLE, mpcc_id); + + hubp->funcs->hubp_program_3dlut_fl_addr(hubp, + mcm_luts->lut3d_data.gpu_mem_params.addr); + + mpc->funcs->rmcm.program_bit_depth(mpc, + mcm_luts->lut3d_data.gpu_mem_params.bit_depth, mpcc_id); + + // setting native or transformed mode, + dc_get_lut_mode(mcm_luts->lut3d_data.gpu_mem_params.layout, &mode, &addr_mode); + + //these program the mcm 3dlut + hubp->funcs->hubp_program_3dlut_fl_mode(hubp, mode); + + hubp->funcs->hubp_program_3dlut_fl_addressing_mode(hubp, addr_mode); + + //seems to be only for the MCM + dc_get_lut_format(mcm_luts->lut3d_data.gpu_mem_params.format_params.format, &format); + hubp->funcs->hubp_program_3dlut_fl_format(hubp, format); + + mpc->funcs->rmcm.program_bias_scale(mpc, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale, + mpcc_id); + hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale); + + dc_get_lut_xbar( + mcm_luts->lut3d_data.gpu_mem_params.component_order, + &crossbar_bit_slice_cr_r, + &crossbar_bit_slice_y_g, + &crossbar_bit_slice_cb_b); + + hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, + crossbar_bit_slice_cr_r, + crossbar_bit_slice_y_g, + crossbar_bit_slice_cb_b); + + mpc->funcs->rmcm.program_3dlut_size(mpc, width, mpcc_id); + + mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst); + + //2b. shaper reg programming + memset(&m_lut_params, 0, sizeof(m_lut_params)); + + if (mcm_luts->shaper->type == TF_TYPE_HWPWL) { + m_lut_params.pwl = &mcm_luts->shaper->pwl; + } else if (mcm_luts->shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { + ASSERT(false); + cm_helper_translate_curve_to_hw_format( + dc->ctx, + mcm_luts->shaper, + &dpp_base->regamma_params, true); + m_lut_params.pwl = &dpp_base->regamma_params; + } + if (m_lut_params.pwl) { + mpc->funcs->rmcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id); + mpc->funcs->rmcm.program_lut_mode(mpc, !bypass_rmcm_shaper, lut_bank_a, mpcc_id); + } else { + //RMCM 3dlut won't work without its shaper + return false; + } + + //3. Select the hubp connected to this RMCM + hubp->funcs->hubp_enable_3dlut_fl(hubp, true); + mpc->funcs->rmcm.enable_3dlut_fl(mpc, true, mpcc_id); + + //4. power on the block + if (m_lut_params.pwl) + mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, true); + + break; + default: + return false; + } + + return true; +} + void dcn401_populate_mcm_luts(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_cm2_func_luts mcm_luts, @@ -407,9 +650,9 @@ void dcn401_populate_mcm_luts(struct dc *dc, struct mpc *mpc = dc->res_pool->mpc; union mcm_lut_params m_lut_params; enum dc_cm2_transfer_func_source lut3d_src = mcm_luts.lut3d_data.lut3d_src; - enum hubp_3dlut_fl_format format; + enum hubp_3dlut_fl_format format = 0; enum hubp_3dlut_fl_mode mode; - enum hubp_3dlut_fl_width width; + enum hubp_3dlut_fl_width width = 0; enum hubp_3dlut_fl_addressing_mode addr_mode; enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g; enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b; @@ -417,11 +660,29 @@ void dcn401_populate_mcm_luts(struct dc *dc, enum MCM_LUT_XABLE shaper_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE; - bool is_17x17x17 = true; bool rval; dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); + //MCM - setting its location (Before/After) blender + //set to post blend (true) + dcn401_set_mcm_location_post_blend( + dc, + pipe_ctx, + mcm_luts.lut3d_data.mpc_mcm_post_blend); + + //RMCM - 3dLUT+Shaper + if (mcm_luts.lut3d_data.rmcm_3dlut_enable) { + dcn401_program_rmcm_luts( + hubp, + pipe_ctx, + lut3d_src, + &mcm_luts, + mpc, + lut_bank_a, + mpcc_id); + } + /* 1D LUT */ if (mcm_luts.lut1d_func) { memset(&m_lut_params, 0, sizeof(m_lut_params)); @@ -442,7 +703,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, } /* Shaper */ - if (mcm_luts.shaper) { + if (mcm_luts.shaper && mcm_luts.lut3d_data.mpc_3dlut_enable) { memset(&m_lut_params, 0, sizeof(m_lut_params)); if (mcm_luts.shaper->type == TF_TYPE_HWPWL) m_lut_params.pwl = &mcm_luts.shaper->pwl; @@ -454,11 +715,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { - if (mpc->funcs->populate_lut) - mpc->funcs->populate_lut(mpc, MCM_LUT_SHAPER, m_lut_params, lut_bank_a, mpcc_id); + if (mpc->funcs->mcm.populate_lut) + mpc->funcs->mcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id); + mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, MCM_LUT_ENABLE, lut_bank_a, mpcc_id); } - if (mpc->funcs->program_lut_mode) - mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, shaper_xable, lut_bank_a, mpcc_id); } /* 3DLUT */ @@ -467,6 +727,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, memset(&m_lut_params, 0, sizeof(m_lut_params)); if (hubp->funcs->hubp_enable_3dlut_fl) hubp->funcs->hubp_enable_3dlut_fl(hubp, false); + if (mcm_luts.lut3d_data.lut3d_func && mcm_luts.lut3d_data.lut3d_func->state.bits.initialized) { m_lut_params.lut3d = &mcm_luts.lut3d_data.lut3d_func->lut_3d; if (mpc->funcs->populate_lut) @@ -476,16 +737,35 @@ void dcn401_populate_mcm_luts(struct dc *dc, mpcc_id); } break; - case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + switch (mcm_luts.lut3d_data.gpu_mem_params.size) { + case DC_CM2_GPU_MEM_SIZE_333333: + width = hubp_3dlut_fl_width_33; + break; + case DC_CM2_GPU_MEM_SIZE_171717: + width = hubp_3dlut_fl_width_17; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + width = hubp_3dlut_fl_width_transformed; + break; + } + + //check for support + if (mpc->funcs->mcm.is_config_supported && + !mpc->funcs->mcm.is_config_supported(width)) + break; if (mpc->funcs->program_lut_read_write_control) mpc->funcs->program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, mpcc_id); if (mpc->funcs->program_lut_mode) mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a, mpcc_id); - if (mpc->funcs->program_3dlut_size) - mpc->funcs->program_3dlut_size(mpc, is_17x17x17, mpcc_id); + if (hubp->funcs->hubp_program_3dlut_fl_addr) hubp->funcs->hubp_program_3dlut_fl_addr(hubp, mcm_luts.lut3d_data.gpu_mem_params.addr); + + if (mpc->funcs->mcm.program_bit_depth) + mpc->funcs->mcm.program_bit_depth(mpc, mcm_luts.lut3d_data.gpu_mem_params.bit_depth, mpcc_id); + switch (mcm_luts.lut3d_data.gpu_mem_params.layout) { case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: mode = hubp_3dlut_fl_mode_native_1; @@ -512,7 +792,6 @@ void dcn401_populate_mcm_luts(struct dc *dc, switch (mcm_luts.lut3d_data.gpu_mem_params.format_params.format) { case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB: - default: format = hubp_3dlut_fl_format_unorm_12msb_bitslice; break; case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB: @@ -524,37 +803,37 @@ void dcn401_populate_mcm_luts(struct dc *dc, } if (hubp->funcs->hubp_program_3dlut_fl_format) hubp->funcs->hubp_program_3dlut_fl_format(hubp, format); - if (hubp->funcs->hubp_update_3dlut_fl_bias_scale) + if (hubp->funcs->hubp_update_3dlut_fl_bias_scale && + mpc->funcs->mcm.program_bias_scale) { + mpc->funcs->mcm.program_bias_scale(mpc, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale, + mpcc_id); hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp, - mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, - mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale); - - switch (mcm_luts.lut3d_data.gpu_mem_params.component_order) { - case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: - default: - crossbar_bit_slice_cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; - crossbar_bit_slice_y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; - crossbar_bit_slice_cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; - break; + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale); } + //navi 4x has a bug and r and blue are swapped and need to be worked around here in + //TODO: need to make a method for get_xbar per asic OR do the workaround in program_crossbar for 4x + dc_get_lut_xbar( + mcm_luts.lut3d_data.gpu_mem_params.component_order, + &crossbar_bit_slice_cr_r, + &crossbar_bit_slice_y_g, + &crossbar_bit_slice_cb_b); + if (hubp->funcs->hubp_program_3dlut_fl_crossbar) hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, + crossbar_bit_slice_cr_r, crossbar_bit_slice_y_g, - crossbar_bit_slice_cb_b, - crossbar_bit_slice_cr_r); + crossbar_bit_slice_cb_b); + + if (mpc->funcs->mcm.program_lut_read_write_control) + mpc->funcs->mcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, true, mpcc_id); + + if (mpc->funcs->mcm.program_3dlut_size) + mpc->funcs->mcm.program_3dlut_size(mpc, width, mpcc_id); - switch (mcm_luts.lut3d_data.gpu_mem_params.size) { - case DC_CM2_GPU_MEM_SIZE_171717: - default: - width = hubp_3dlut_fl_width_17; - break; - case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: - width = hubp_3dlut_fl_width_transformed; - break; - } - if (hubp->funcs->hubp_program_3dlut_fl_width) - hubp->funcs->hubp_program_3dlut_fl_width(hubp, width); if (mpc->funcs->update_3dlut_fast_load_select) mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 781cf0efccc6c..ce65b4f6c6727 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -109,4 +109,12 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); +bool dcn401_program_rmcm_luts( + struct hubp *hubp, + struct pipe_ctx *pipe_ctx, + enum dc_cm2_transfer_func_source lut3d_src, + struct dc_cm2_func_luts *mcm_luts, + struct mpc *mpc, + bool lut_bank_a, + int mpcc_id); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index eaef3899da7bd..6e303b81bfb0f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -1037,6 +1037,35 @@ struct mpc_funcs { * void */ void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); + + struct { + void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id); + void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id); + void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id); + bool (*is_config_supported)(uint32_t width); + void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, + bool lut_bank_a, bool enabled, int mpcc_id); + + void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params, + bool lut_bank_a, int mpcc_id); + } mcm; + + struct { + void (*enable_3dlut_fl)(struct mpc *mpc, bool enable, int mpcc_id); + void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx); + void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, + bool lut_bank_a, bool enabled, int mpcc_id); + void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_XABLE xable, + bool lut_bank_a, int mpcc_id); + void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id); + void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id); + void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id); + bool (*is_config_supported)(uint32_t width); + + void (*power_on_shaper_3dlut)(struct mpc *mpc, uint32_t mpcc_id, bool power_on); + void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params, + bool lut_bank_a, int mpcc_id); + } rmcm; }; #endif -- GitLab From cd74ce1f0cddffb3f36d0995d0f61e89f0010738 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Wed, 2 Apr 2025 15:04:08 -0400 Subject: [PATCH 399/899] drm/amd/display: Enable urgent latency adjustment on DCN35 [Why] Urgent latency adjustment was disabled on DCN35 due to issues with P0 enablement on some platforms. Without urgent latency, underflows occur when doing certain high timing configurations. After testing, we found that reenabling urgent latency didn't reintroduce p0 support on multiple platforms. [How] renable urgent latency on DCN35 and setting it to 3000 Mhz. This reverts commit 3412860cc4c0c484f53f91b371483e6e4440c3e5. Reviewed-by: Charlene Liu Signed-off-by: Nicholas Susanto Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 92f0a099d089a..d9159ca554124 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 0, + .do_urgent_latency_adjustment = 1, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) -- GitLab From e15d09f510d0303b53e556a73fa4236744c19695 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 3 Apr 2025 13:49:03 -0400 Subject: [PATCH 400/899] drm/amd/display: enable phy-ssc reduction by default [Why] Reduction of phy-ssc is needed to support DP2 high pixel clock on dcn35x/36. There's a special flag to enable it in dmub hw params. [How] Set hbr3_phy_ssc to true for dcn35, dcn351 and dcn36. Reviewed-by: Charlene Liu Signed-off-by: Roman Li Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e700b1edac2c9..aa42be3d63b0f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1330,6 +1330,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) case IP_VERSION(3, 5, 1): case IP_VERSION(3, 6, 0): hw_params.ips_sequential_ono = adev->external_rev_id > 0x10; + hw_params.lower_hbr3_phy_ssc = true; break; default: break; -- GitLab From 372c8d72c3680fdea3fbb2d6b089f76b4a6d596a Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 16 Apr 2025 00:19:13 -0400 Subject: [PATCH 401/899] drm/amdgpu: Allow P2P access through XGMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer memory is accessible through XGMI, allow leaving it in VRAM rather than forcing its migration to GTT on DMABuf attachment. Signed-off-by: Felix Kuehling Tested-by: Hao (Claire) Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 5740e8d1a5226..e6913fcf2c7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include #include +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -480,6 +505,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; -- GitLab From 4e9b0ac17f5da2ac03090bf6b706970293c64d1c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 17 Apr 2025 12:10:36 +0300 Subject: [PATCH 402/899] drm/i915/display: pass struct intel_display to PCH macros Now that INTEL_PCH_TYPE() and HAS_PCH_*() macros are under display, and accept a struct intel_display pointer, use that instead of struct drm_i915_private pointer in display code. This is done naively by running: $ sed -i 's/\(INTEL_PCH_TYPE\|HAS_PCH_[A-Z0-9_-]*\)([^)]*)/\1(display)/g' \ $(find drivers/gpu/drm/i915/display -name "*.c") and fixing the fallout, i.e. removing unused local i915 variables and adding display variables where needed. v2: Rebase Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/999f4d7b8ed11739b1c5ec8d6408fc39d5e3776b.1744880985.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/g4x_dp.c | 26 ++++----- drivers/gpu/drm/i915/display/g4x_hdmi.c | 21 +++---- drivers/gpu/drm/i915/display/i9xx_wm.c | 4 +- drivers/gpu/drm/i915/display/intel_audio.c | 10 +--- .../gpu/drm/i915/display/intel_backlight.c | 37 +++++-------- drivers/gpu/drm/i915/display/intel_bios.c | 17 +++--- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++-- drivers/gpu/drm/i915/display/intel_crt.c | 31 ++++------- drivers/gpu/drm/i915/display/intel_ddi.c | 12 +--- drivers/gpu/drm/i915/display/intel_display.c | 21 ++----- .../drm/i915/display/intel_display_debugfs.c | 6 +- .../drm/i915/display/intel_display_device.c | 5 +- .../gpu/drm/i915/display/intel_display_irq.c | 33 +++++------ .../drm/i915/display/intel_display_power.c | 31 ++++------- drivers/gpu/drm/i915/display/intel_dp.c | 3 +- drivers/gpu/drm/i915/display/intel_dp_aux.c | 8 +-- drivers/gpu/drm/i915/display/intel_dpll.c | 11 +--- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 6 +- drivers/gpu/drm/i915/display/intel_fdi.c | 13 ++--- .../drm/i915/display/intel_fifo_underrun.c | 4 +- drivers/gpu/drm/i915/display/intel_gmbus.c | 16 +++--- drivers/gpu/drm/i915/display/intel_hdmi.c | 28 ++++------ .../gpu/drm/i915/display/intel_hotplug_irq.c | 55 ++++++++----------- drivers/gpu/drm/i915/display/intel_lvds.c | 21 +++---- .../gpu/drm/i915/display/intel_pch_display.c | 41 +++++--------- .../gpu/drm/i915/display/intel_pch_refclk.c | 15 ++--- drivers/gpu/drm/i915/display/intel_pps.c | 26 +++------ drivers/gpu/drm/i915/display/intel_sdvo.c | 24 +++----- 28 files changed, 201 insertions(+), 336 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 18e51799d2a67..9a8c42d6aea90 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -59,14 +59,13 @@ static void g4x_dp_set_clock(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct dpll *divisor = NULL; int i, count = 0; if (display->platform.g4x) { divisor = g4x_dpll; count = ARRAY_SIZE(g4x_dpll); - } else if (HAS_PCH_SPLIT(dev_priv)) { + } else if (HAS_PCH_SPLIT(display)) { divisor = pch_dpll; count = ARRAY_SIZE(pch_dpll); } else if (display->platform.cherryview) { @@ -92,7 +91,6 @@ static void intel_dp_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); @@ -140,7 +138,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_dp->DP |= DP_ENHANCED_FRAMING; intel_dp->DP |= DP_PIPE_SEL_IVB(crtc->pipe); - } else if (HAS_PCH_CPT(dev_priv) && port != PORT_A) { + } else if (HAS_PCH_CPT(display) && port != PORT_A) { intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT; intel_de_rmw(display, TRANS_DP_CTL(crtc->pipe), @@ -276,7 +274,6 @@ bool g4x_dp_port_enabled(struct intel_display *display, i915_reg_t dp_reg, enum port port, enum pipe *pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); bool ret; u32 val; @@ -287,7 +284,7 @@ bool g4x_dp_port_enabled(struct intel_display *display, /* asserts want to know the pipe even if the port is disabled */ if (display->platform.ivybridge && port == PORT_A) *pipe = (val & DP_PIPE_SEL_MASK_IVB) >> DP_PIPE_SEL_SHIFT_IVB; - else if (HAS_PCH_CPT(dev_priv) && port != PORT_A) + else if (HAS_PCH_CPT(display) && port != PORT_A) ret &= cpt_dp_port_selected(display, port, pipe); else if (display->platform.cherryview) *pipe = (val & DP_PIPE_SEL_MASK_CHV) >> DP_PIPE_SEL_SHIFT_CHV; @@ -337,7 +334,6 @@ static void intel_dp_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); u32 tmp, flags = 0; enum port port = encoder->port; @@ -352,7 +348,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A; - if (HAS_PCH_CPT(dev_priv) && port != PORT_A) { + if (HAS_PCH_CPT(display) && port != PORT_A) { u32 trans_dp = intel_de_read(display, TRANS_DP_CTL(crtc->pipe)); @@ -415,7 +411,6 @@ intel_dp_link_down(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum port port = encoder->port; @@ -428,7 +423,7 @@ intel_dp_link_down(struct intel_encoder *encoder, drm_dbg_kms(display->drm, "\n"); if ((display->platform.ivybridge && port == PORT_A) || - (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { + (HAS_PCH_CPT(display) && port != PORT_A)) { intel_dp->DP &= ~DP_LINK_TRAIN_MASK_CPT; intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; } else { @@ -447,7 +442,7 @@ intel_dp_link_down(struct intel_encoder *encoder, * to transcoder A after disabling it to allow the * matching HDMI port to be enabled on transcoder A. */ - if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B && port != PORT_A) { + if (HAS_PCH_IBX(display) && crtc->pipe == PIPE_B && port != PORT_A) { /* * We get CPU/PCH FIFO underruns on the other pipe when * doing the workaround. Sweep them under the rug. @@ -1216,10 +1211,10 @@ static int g4x_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); int ret; - if (HAS_PCH_SPLIT(i915) && encoder->port != PORT_A) + if (HAS_PCH_SPLIT(display) && encoder->port != PORT_A) crtc_state->has_pch_encoder = true; ret = intel_dp_compute_config(encoder, crtc_state, conn_state); @@ -1272,7 +1267,6 @@ static const struct drm_encoder_funcs intel_dp_enc_funcs = { bool g4x_dp_init(struct intel_display *display, i915_reg_t output_reg, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); const struct intel_bios_encoder_data *devdata; struct intel_digital_port *dig_port; struct intel_encoder *intel_encoder; @@ -1346,7 +1340,7 @@ bool g4x_dp_init(struct intel_display *display, intel_encoder->audio_disable = g4x_dp_audio_disable; if ((display->platform.ivybridge && port == PORT_A) || - (HAS_PCH_CPT(dev_priv) && port != PORT_A)) + (HAS_PCH_CPT(display) && port != PORT_A)) dig_port->dp.set_link_train = cpt_set_link_train; else dig_port->dp.set_link_train = g4x_set_link_train; @@ -1363,7 +1357,7 @@ bool g4x_dp_init(struct intel_display *display, intel_encoder->set_signal_levels = g4x_set_signal_levels; if (display->platform.valleyview || display->platform.cherryview || - (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) { + (HAS_PCH_SPLIT(display) && port != PORT_A)) { dig_port->dp.preemph_max = intel_dp_preemph_max_3; dig_port->dp.voltage_max = intel_dp_voltage_max_3; } else { diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 21b5db2fa203f..4e8b13e9ddd8f 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -27,7 +27,6 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -36,7 +35,7 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder, intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); hdmi_val = SDVO_ENCODING_HDMI; - if (!HAS_PCH_SPLIT(dev_priv) && crtc_state->limited_color_range) + if (!HAS_PCH_SPLIT(display) && crtc_state->limited_color_range) hdmi_val |= HDMI_COLOR_RANGE_16_235; if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC) hdmi_val |= SDVO_VSYNC_ACTIVE_HIGH; @@ -51,7 +50,7 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder, if (crtc_state->has_hdmi_sink) hdmi_val |= HDMI_MODE_SELECT_HDMI; - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) hdmi_val |= SDVO_PIPE_SEL_CPT(crtc->pipe); else if (display->platform.cherryview) hdmi_val |= SDVO_PIPE_SEL_CHV(crtc->pipe); @@ -133,9 +132,8 @@ static int g4x_hdmi_compute_config(struct intel_encoder *encoder, struct intel_display *display = to_intel_display(encoder); struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (HAS_PCH_SPLIT(i915)) { + if (HAS_PCH_SPLIT(display)) { crtc_state->has_pch_encoder = true; if (!intel_fdi_compute_pipe_bpp(crtc_state)) return -EINVAL; @@ -154,7 +152,6 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); u32 tmp, flags = 0; int dotclock; @@ -185,7 +182,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, if (tmp & HDMI_AUDIO_ENABLE) pipe_config->has_audio = true; - if (!HAS_PCH_SPLIT(dev_priv) && + if (!HAS_PCH_SPLIT(display) && tmp & HDMI_COLOR_RANGE_16_235) pipe_config->limited_color_range = true; @@ -382,7 +379,6 @@ static void intel_disable_hdmi(struct intel_atomic_state *state, const struct drm_connector_state *old_conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct intel_digital_port *dig_port = hdmi_to_dig_port(intel_hdmi); @@ -400,7 +396,7 @@ static void intel_disable_hdmi(struct intel_atomic_state *state, * to transcoder A after disabling it to allow the * matching DP port to be enabled on transcoder A. */ - if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) { + if (HAS_PCH_IBX(display) && crtc->pipe == PIPE_B) { /* * We get CPU/PCH FIFO underruns on the other pipe when * doing the workaround. Sweep them under the rug. @@ -674,7 +670,6 @@ static bool assert_hdmi_port_valid(struct intel_display *display, enum port port bool g4x_hdmi_init(struct intel_display *display, i915_reg_t hdmi_reg, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); const struct intel_bios_encoder_data *devdata; struct intel_digital_port *dig_port; struct intel_encoder *intel_encoder; @@ -716,7 +711,7 @@ bool g4x_hdmi_init(struct intel_display *display, intel_encoder->hotplug = intel_hdmi_hotplug; intel_encoder->compute_config = g4x_hdmi_compute_config; - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(display)) { intel_encoder->disable = pch_disable_hdmi; intel_encoder->post_disable = pch_post_disable_hdmi; } else { @@ -737,9 +732,9 @@ bool g4x_hdmi_init(struct intel_display *display, intel_encoder->post_disable = vlv_hdmi_post_disable; } else { intel_encoder->pre_enable = intel_hdmi_pre_enable; - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) intel_encoder->enable = cpt_enable_hdmi; - else if (HAS_PCH_IBX(dev_priv)) + else if (HAS_PCH_IBX(display)) intel_encoder->enable = ibx_enable_hdmi; else intel_encoder->enable = g4x_enable_hdmi; diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index 40751f1547b76..77876ef735b74 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -4143,10 +4143,8 @@ static const struct intel_wm_funcs nop_funcs = { void i9xx_wm_init(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - /* For FIFO watermark updates */ - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(display)) { ilk_setup_wm_latency(display); display->funcs.wm = &ilk_wm_funcs; } else if (display->platform.valleyview || display->platform.cherryview) { diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ea935a5d94c87..ef07b54882619 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -587,19 +587,17 @@ static void ibx_audio_regs_init(struct intel_display *display, enum pipe pipe, struct ibx_audio_regs *regs) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (display->platform.valleyview || display->platform.cherryview) { regs->hdmiw_hdmiedid = VLV_HDMIW_HDMIEDID(pipe); regs->aud_config = VLV_AUD_CFG(pipe); regs->aud_cntl_st = VLV_AUD_CNTL_ST(pipe); regs->aud_cntrl_st2 = VLV_AUD_CNTL_ST2; - } else if (HAS_PCH_CPT(i915)) { + } else if (HAS_PCH_CPT(display)) { regs->hdmiw_hdmiedid = CPT_HDMIW_HDMIEDID(pipe); regs->aud_config = CPT_AUD_CFG(pipe); regs->aud_cntl_st = CPT_AUD_CNTL_ST(pipe); regs->aud_cntrl_st2 = CPT_AUD_CNTRL_ST2; - } else if (HAS_PCH_IBX(i915)) { + } else if (HAS_PCH_IBX(display)) { regs->hdmiw_hdmiedid = IBX_HDMIW_HDMIEDID(pipe); regs->aud_config = IBX_AUD_CFG(pipe); regs->aud_cntl_st = IBX_AUD_CNTL_ST(pipe); @@ -889,12 +887,10 @@ static const struct intel_audio_funcs hsw_audio_funcs = { */ void intel_audio_hooks_init(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (display->platform.g4x) display->funcs.audio = &g4x_audio_funcs; else if (display->platform.valleyview || display->platform.cherryview || - HAS_PCH_CPT(i915) || HAS_PCH_IBX(i915)) + HAS_PCH_CPT(display) || HAS_PCH_IBX(display)) display->funcs.audio = &ibx_audio_funcs; else if (display->platform.haswell || DISPLAY_VER(display) >= 8) display->funcs.audio = &hsw_audio_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 4f3fa966c5372..5470fee7a707e 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -473,7 +473,6 @@ static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state, { struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 pch_ctl1, pch_ctl2; @@ -486,7 +485,7 @@ static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state, intel_de_write(display, BLC_PWM_PCH_CTL1, pch_ctl1); } - if (HAS_PCH_LPT(i915)) + if (HAS_PCH_LPT(display)) intel_de_rmw(display, SOUTH_CHICKEN2, LPT_PWM_GRANULARITY, panel->backlight.alternate_pwm_increment ? LPT_PWM_GRANULARITY : 0); @@ -503,7 +502,7 @@ static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state, pch_ctl1 |= BLM_PCH_POLARITY; /* After LPT, override is the default. */ - if (HAS_PCH_LPT(i915)) + if (HAS_PCH_LPT(display)) pch_ctl1 |= BLM_PCH_OVERRIDE_ENABLE; intel_de_write(display, BLC_PWM_PCH_CTL1, pch_ctl1); @@ -1064,7 +1063,7 @@ static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) */ static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; u32 mul, clock; @@ -1073,7 +1072,7 @@ static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) else mul = 128; - if (HAS_PCH_LPT_H(i915)) + if (HAS_PCH_LPT_H(display)) clock = MHz(135); /* LPT:H */ else clock = MHz(24); /* LPT:LP */ @@ -1230,12 +1229,11 @@ static u32 get_backlight_min_vbt(struct intel_connector *connector) static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unused) { struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 cpu_ctl2, pch_ctl1, pch_ctl2, val; bool alt, cpu_mode; - if (HAS_PCH_LPT(i915)) + if (HAS_PCH_LPT(display)) alt = intel_de_read(display, SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY; else alt = intel_de_read(display, SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY; @@ -1259,7 +1257,7 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus panel->backlight.pwm_enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE; - cpu_mode = panel->backlight.pwm_enabled && HAS_PCH_LPT(i915) && + cpu_mode = panel->backlight.pwm_enabled && HAS_PCH_LPT(display) && !(pch_ctl1 & BLM_PCH_OVERRIDE_ENABLE) && (cpu_ctl2 & BLM_PWM_ENABLE); @@ -1466,15 +1464,13 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + if (INTEL_PCH_TYPE(display) >= PCH_MTL) return 2; - if (INTEL_PCH_TYPE(i915) >= PCH_DG1) + if (INTEL_PCH_TYPE(display) >= PCH_DG1) return 1; - if (INTEL_PCH_TYPE(i915) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) return 2; return 1; @@ -1482,14 +1478,12 @@ static int cnp_num_backlight_controllers(struct intel_display *display) static bool cnp_backlight_controller_is_valid(struct intel_display *display, int controller) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (controller < 0 || controller >= cnp_num_backlight_controllers(display)) return false; if (controller == 1 && - INTEL_PCH_TYPE(i915) >= PCH_ICP && - INTEL_PCH_TYPE(i915) <= PCH_ADP) + INTEL_PCH_TYPE(display) >= PCH_ICP && + INTEL_PCH_TYPE(display) <= PCH_ADP) return intel_de_read(display, SOUTH_CHICKEN1) & ICP_SECOND_PPS_IO_SELECT; return true; @@ -1818,7 +1812,6 @@ void intel_backlight_init_funcs(struct intel_panel *panel) struct intel_connector *connector = container_of(panel, struct intel_connector, panel); struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(connector->base.dev); if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI && intel_dsi_dcs_init_backlight_funcs(connector) == 0) @@ -1826,14 +1819,14 @@ void intel_backlight_init_funcs(struct intel_panel *panel) if (display->platform.geminilake || display->platform.broxton) { panel->backlight.pwm_funcs = &bxt_pwm_funcs; - } else if (INTEL_PCH_TYPE(i915) >= PCH_CNP) { + } else if (INTEL_PCH_TYPE(display) >= PCH_CNP) { panel->backlight.pwm_funcs = &cnp_pwm_funcs; - } else if (INTEL_PCH_TYPE(i915) >= PCH_LPT_H) { - if (HAS_PCH_LPT(i915)) + } else if (INTEL_PCH_TYPE(display) >= PCH_LPT_H) { + if (HAS_PCH_LPT(display)) panel->backlight.pwm_funcs = &lpt_pwm_funcs; else panel->backlight.pwm_funcs = &spt_pwm_funcs; - } else if (HAS_PCH_SPLIT(i915)) { + } else if (HAS_PCH_SPLIT(display)) { panel->backlight.pwm_funcs = &pch_pwm_funcs; } else if (display->platform.valleyview || display->platform.cherryview) { if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI) { diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 8fe854a452437..ba7b8938b17c3 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2245,28 +2245,27 @@ static const u8 adlp_ddc_pin_map[] = { static u8 map_ddc_pin(struct intel_display *display, u8 vbt_pin) { - struct drm_i915_private *i915 = to_i915(display->drm); const u8 *ddc_pin_map; int i, n_entries; - if (INTEL_PCH_TYPE(i915) >= PCH_MTL || display->platform.alderlake_p) { + if (INTEL_PCH_TYPE(display) >= PCH_MTL || display->platform.alderlake_p) { ddc_pin_map = adlp_ddc_pin_map; n_entries = ARRAY_SIZE(adlp_ddc_pin_map); } else if (display->platform.alderlake_s) { ddc_pin_map = adls_ddc_pin_map; n_entries = ARRAY_SIZE(adls_ddc_pin_map); - } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) { + } else if (INTEL_PCH_TYPE(display) >= PCH_DG1) { return vbt_pin; - } else if (display->platform.rocketlake && INTEL_PCH_TYPE(i915) == PCH_TGP) { + } else if (display->platform.rocketlake && INTEL_PCH_TYPE(display) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); - } else if (HAS_PCH_TGP(i915) && DISPLAY_VER(display) == 9) { + } else if (HAS_PCH_TGP(display) && DISPLAY_VER(display) == 9) { ddc_pin_map = gen9bc_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map); - } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { + } else if (INTEL_PCH_TYPE(display) >= PCH_ICP) { ddc_pin_map = icp_ddc_pin_map; n_entries = ARRAY_SIZE(icp_ddc_pin_map); - } else if (HAS_PCH_CNP(i915)) { + } else if (HAS_PCH_CNP(display)) { ddc_pin_map = cnp_ddc_pin_map; n_entries = ARRAY_SIZE(cnp_ddc_pin_map); } else { @@ -2865,8 +2864,6 @@ parse_general_definitions(struct intel_display *display) static void init_vbt_defaults(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - display->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC; /* general features */ @@ -2883,7 +2880,7 @@ init_vbt_defaults(struct intel_display *display) * clock for LVDS. */ display->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(display, - !HAS_PCH_SPLIT(i915)); + !HAS_PCH_SPLIT(display)); drm_dbg_kms(display->drm, "Set default to SSC at %d kHz\n", display->vbt.lvds_ssc_freq); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6830950aae3f2..b1718b491ffda 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3494,7 +3494,6 @@ static int dg1_rawclk(struct intel_display *display) static int cnp_rawclk(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); int divider, fraction; u32 rawclk; @@ -3514,7 +3513,7 @@ static int cnp_rawclk(struct intel_display *display) rawclk |= CNP_RAWCLK_DEN(DIV_ROUND_CLOSEST(numerator * 1000, fraction) - 1); - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) rawclk |= ICP_RAWCLK_NUM(numerator); } @@ -3553,21 +3552,20 @@ static int i9xx_hrawclk(struct intel_display *display) */ u32 intel_read_rawclk(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 freq; - if (INTEL_PCH_TYPE(dev_priv) >= PCH_MTL) + if (INTEL_PCH_TYPE(display) >= PCH_MTL) /* * MTL always uses a 38.4 MHz rawclk. The bspec tells us * "RAWCLK_FREQ defaults to the values for 38.4 and does * not need to be programmed." */ freq = 38400; - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1) + else if (INTEL_PCH_TYPE(display) >= PCH_DG1) freq = dg1_rawclk(display); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) + else if (INTEL_PCH_TYPE(display) >= PCH_CNP) freq = cnp_rawclk(display); - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(display)) freq = pch_rawclk(display); else if (display->platform.valleyview || display->platform.cherryview) freq = vlv_hrawclk(display); diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 8908b51dc3b93..415e594053180 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -91,13 +91,12 @@ static struct intel_crt *intel_attached_crt(struct intel_connector *connector) bool intel_crt_port_enabled(struct intel_display *display, i915_reg_t adpa_reg, enum pipe *pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 val; val = intel_de_read(display, adpa_reg); /* asserts want to know the pipe even if the port is disabled */ - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) *pipe = REG_FIELD_GET(ADPA_PIPE_SEL_MASK_CPT, val); else *pipe = REG_FIELD_GET(ADPA_PIPE_SEL_MASK, val); @@ -177,7 +176,6 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -194,14 +192,14 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder, adpa |= ADPA_VSYNC_ACTIVE_HIGH; /* For CPT allow 3 pipe config, for others just use A or B */ - if (HAS_PCH_LPT(dev_priv)) + if (HAS_PCH_LPT(display)) ; /* Those bits don't exist here */ - else if (HAS_PCH_CPT(dev_priv)) + else if (HAS_PCH_CPT(display)) adpa |= ADPA_PIPE_SEL_CPT(crtc->pipe); else adpa |= ADPA_PIPE_SEL(crtc->pipe); - if (!HAS_PCH_SPLIT(dev_priv)) + if (!HAS_PCH_SPLIT(display)) intel_de_write(display, BCLRPAT(display, crtc->pipe), 0); switch (mode) { @@ -356,7 +354,6 @@ intel_crt_mode_valid(struct drm_connector *connector, const struct drm_display_mode *mode) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_i915_private *dev_priv = to_i915(connector->dev); int max_dotclk = display->cdclk.max_dotclk_freq; enum drm_mode_status status; int max_clock; @@ -368,7 +365,7 @@ intel_crt_mode_valid(struct drm_connector *connector, if (mode->clock < 25000) return MODE_CLOCK_LOW; - if (HAS_PCH_LPT(dev_priv)) + if (HAS_PCH_LPT(display)) max_clock = 180000; else if (display->platform.valleyview) /* @@ -387,7 +384,7 @@ intel_crt_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; /* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */ - if (HAS_PCH_LPT(dev_priv) && + if (HAS_PCH_LPT(display) && ilk_get_lanes_required(mode->clock, 270000, 24) > 2) return MODE_CLOCK_HIGH; @@ -438,7 +435,6 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -457,7 +453,7 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; /* LPT FDI RX only supports 8bpc. */ - if (HAS_PCH_LPT(dev_priv)) { + if (HAS_PCH_LPT(display)) { /* TODO: Check crtc_state->max_link_bpp_x16 instead of bw_constrained */ if (crtc_state->bw_constrained && crtc_state->pipe_bpp < 24) { drm_dbg_kms(display->drm, @@ -482,13 +478,12 @@ static bool ilk_crt_detect_hotplug(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector)); - struct drm_i915_private *dev_priv = to_i915(connector->dev); u32 adpa; bool ret; /* The first time through, trigger an explicit detection cycle */ if (crt->force_hotplug_required) { - bool turn_off_dac = HAS_PCH_SPLIT(dev_priv); + bool turn_off_dac = HAS_PCH_SPLIT(display); u32 save_adpa; crt->force_hotplug_required = false; @@ -583,12 +578,11 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) static bool intel_crt_detect_hotplug(struct drm_connector *connector) { struct intel_display *display = to_intel_display(connector->dev); - struct drm_i915_private *dev_priv = to_i915(connector->dev); u32 stat; bool ret = false; int i, tries = 0; - if (HAS_PCH_SPLIT(dev_priv)) + if (HAS_PCH_SPLIT(display)) return ilk_crt_detect_hotplug(connector); if (display->platform.valleyview) @@ -1011,14 +1005,13 @@ static const struct drm_encoder_funcs intel_crt_enc_funcs = { void intel_crt_init(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_connector *connector; struct intel_crt *crt; i915_reg_t adpa_reg; u8 ddc_pin; u32 adpa; - if (HAS_PCH_SPLIT(dev_priv)) + if (HAS_PCH_SPLIT(display)) adpa_reg = PCH_ADPA; else if (display->platform.valleyview) adpa_reg = VLV_ADPA; @@ -1108,7 +1101,7 @@ void intel_crt_init(struct intel_display *display) intel_ddi_buf_trans_init(&crt->base); } else { - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(display)) { crt->base.compute_config = pch_crt_compute_config; crt->base.disable = pch_disable_crt; crt->base.post_disable = pch_post_disable_crt; @@ -1130,7 +1123,7 @@ void intel_crt_init(struct intel_display *display) * polarity and link reversal bits or not, instead of relying on the * BIOS. */ - if (HAS_PCH_LPT(dev_priv)) { + if (HAS_PCH_LPT(display)) { u32 fdi_config = FDI_RX_POLARITY_REVERSED_LPT | FDI_RX_LINK_REVERSAL_OVERRIDE; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b48ed5df7a967..99142b8cf5690 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4902,9 +4902,7 @@ static enum hpd_pin tgl_hpd_pin(struct intel_display *display, enum port port) static enum hpd_pin rkl_hpd_pin(struct intel_display *display, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - - if (HAS_PCH_TGP(dev_priv)) + if (HAS_PCH_TGP(display)) return tgl_hpd_pin(display, port); if (port >= PORT_TC1) @@ -4923,12 +4921,10 @@ static enum hpd_pin icl_hpd_pin(struct intel_display *display, enum port port) static enum hpd_pin ehl_hpd_pin(struct intel_display *display, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (port == PORT_D) return HPD_PORT_A; - if (HAS_PCH_TGP(dev_priv)) + if (HAS_PCH_TGP(display)) return icl_hpd_pin(display, port); return HPD_PORT_A + port - PORT_A; @@ -4936,9 +4932,7 @@ static enum hpd_pin ehl_hpd_pin(struct intel_display *display, enum port port) static enum hpd_pin skl_hpd_pin(struct intel_display *display, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - - if (HAS_PCH_TGP(dev_priv)) + if (HAS_PCH_TGP(display)) return icl_hpd_pin(display, port); return HPD_PORT_A + port - PORT_A; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 33c09999c42e0..9c966bbbfd316 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1509,7 +1509,6 @@ static void ilk_crtc_enable(struct intel_atomic_state *state, struct intel_display *display = to_intel_display(crtc); const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; if (drm_WARN_ON(display->drm, crtc->active)) @@ -1561,7 +1560,7 @@ static void ilk_crtc_enable(struct intel_atomic_state *state, intel_encoders_enable(state, crtc); - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) intel_wait_for_pipe_scanline_moving(crtc); /* @@ -2533,15 +2532,13 @@ intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes, void intel_panel_sanitize_ssc(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - /* * There may be no VBT; and if the BIOS enabled SSC we can * just keep using it to avoid unnecessary flicker. Whereas if the * BIOS isn't using it, don't assume it will work even if the VBT * indicates as much. */ - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display)) { bool bios_lvds_use_ssc = intel_de_read(display, PCH_DREF_CONTROL) & DREF_SSC1_ENABLE; @@ -6546,7 +6543,6 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, { struct intel_display *display = to_intel_display(new_crtc_state); struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); /* * Update pipe size and adjust fitter if needed: the reason for this is @@ -6562,7 +6558,7 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, if (DISPLAY_VER(display) >= 9) { if (new_crtc_state->pch_pfit.enabled) skl_pfit_enable(new_crtc_state); - } else if (HAS_PCH_SPLIT(dev_priv)) { + } else if (HAS_PCH_SPLIT(display)) { if (new_crtc_state->pch_pfit.enabled) ilk_pfit_enable(new_crtc_state); else if (old_crtc_state->pch_pfit.enabled) @@ -7638,15 +7634,13 @@ static bool ilk_has_edp_a(struct intel_display *display) static bool intel_ddi_crt_present(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (DISPLAY_VER(display) >= 9) return false; if (display->platform.haswell_ult || display->platform.broadwell_ult) return false; - if (HAS_PCH_LPT_H(dev_priv) && + if (HAS_PCH_LPT_H(display) && intel_de_read(display, SFUSE_STRAP) & SFUSE_STRAP_CRT_DISABLED) return false; @@ -7668,7 +7662,6 @@ bool assert_port_valid(struct intel_display *display, enum port port) void intel_setup_outputs(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_encoder *encoder; bool dpd_is_edp = false; @@ -7685,7 +7678,7 @@ void intel_setup_outputs(struct intel_display *display) if (display->platform.geminilake || display->platform.broxton) vlv_dsi_init(display); - } else if (HAS_PCH_SPLIT(dev_priv)) { + } else if (HAS_PCH_SPLIT(display)) { int found; /* @@ -8053,13 +8046,11 @@ static const struct intel_display_funcs i9xx_display_funcs = { */ void intel_init_display_hooks(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (DISPLAY_VER(display) >= 9) { display->funcs.display = &skl_display_funcs; } else if (HAS_DDI(display)) { display->funcs.display = &ddi_display_funcs; - } else if (HAS_PCH_SPLIT(dev_priv)) { + } else if (HAS_PCH_SPLIT(display)) { display->funcs.display = &pch_split_display_funcs; } else if (display->platform.cherryview || display->platform.valleyview) { diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 3f7c605d47d3a..957c9a579a18f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -53,10 +53,9 @@ static struct intel_display *node_to_intel_display(struct drm_info_node *node) static int intel_display_caps(struct seq_file *m, void *data) { struct intel_display *display = node_to_intel_display(m->private); - struct drm_i915_private *i915 = to_i915(display->drm); struct drm_printer p = drm_seq_file_printer(m); - drm_printf(&p, "PCH type: %d\n", INTEL_PCH_TYPE(i915)); + drm_printf(&p, "PCH type: %d\n", INTEL_PCH_TYPE(display)); intel_display_device_info_print(DISPLAY_INFO(display), DISPLAY_RUNTIME_INFO(display), &p); @@ -85,7 +84,6 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) static int i915_sr_status(struct seq_file *m, void *unused) { struct intel_display *display = node_to_intel_display(m->private); - struct drm_i915_private *dev_priv = to_i915(display->drm); intel_wakeref_t wakeref; bool sr_enabled = false; @@ -93,7 +91,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) if (DISPLAY_VER(display) >= 9) /* no global SR status; inspect per-plane WM */; - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(display)) sr_enabled = intel_de_read(display, WM1_LP_ILK) & WM_LP_ENABLE; else if (display->platform.i965gm || display->platform.g4x || display->platform.i945g || display->platform.i945gm) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 738ae522c8f4f..61f76487397af 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1711,7 +1711,6 @@ void intel_display_device_remove(struct intel_display *display) static void __intel_display_device_info_runtime_init(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_display_runtime_info *display_runtime = DISPLAY_RUNTIME_INFO(display); enum pipe pipe; @@ -1775,7 +1774,7 @@ static void __intel_display_device_info_runtime_init(struct intel_display *displ goto display_fused_off; } - if (IS_DISPLAY_VER(display, 7, 8) && HAS_PCH_SPLIT(i915)) { + if (IS_DISPLAY_VER(display, 7, 8) && HAS_PCH_SPLIT(display)) { u32 fuse_strap = intel_de_read(display, FUSE_STRAP); u32 sfuse_strap = intel_de_read(display, SFUSE_STRAP); @@ -1790,7 +1789,7 @@ static void __intel_display_device_info_runtime_init(struct intel_display *displ */ if (fuse_strap & ILK_INTERNAL_DISPLAY_DISABLE || sfuse_strap & SFUSE_STRAP_DISPLAY_DISABLED || - (HAS_PCH_CPT(i915) && + (HAS_PCH_CPT(display) && !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) { drm_info(display->drm, "Display fused off, disabling\n"); diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index d28ad70a35380..2e34af6e40f08 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -879,7 +879,7 @@ static void ilk_gtt_fault_irq_handler(struct intel_display *display) void ilk_display_irq_handler(struct intel_display *display, u32 de_iir) { - struct drm_i915_private *dev_priv = to_i915(display->drm); + struct drm_i915_private __maybe_unused *dev_priv = to_i915(display->drm); enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG; @@ -916,7 +916,7 @@ void ilk_display_irq_handler(struct intel_display *display, u32 de_iir) if (de_iir & DE_PCH_EVENT) { u32 pch_iir = intel_de_read(display, SDEIIR); - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) cpt_irq_handler(display, pch_iir); else ibx_irq_handler(display, pch_iir); @@ -931,7 +931,6 @@ void ilk_display_irq_handler(struct intel_display *display, u32 de_iir) void ivb_display_irq_handler(struct intel_display *display, u32 de_iir) { - struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB; @@ -969,7 +968,7 @@ void ivb_display_irq_handler(struct intel_display *display, u32 de_iir) } /* check event from PCH */ - if (!HAS_PCH_NOP(dev_priv) && (de_iir & DE_PCH_EVENT_IVB)) { + if (!HAS_PCH_NOP(display) && (de_iir & DE_PCH_EVENT_IVB)) { u32 pch_iir = intel_de_read(display, SDEIIR); cpt_irq_handler(display, pch_iir); @@ -1311,7 +1310,6 @@ static u32 gen8_de_pipe_flip_done_mask(struct intel_display *display) static void gen8_read_and_ack_pch_irqs(struct intel_display *display, u32 *pch_iir, u32 *pica_iir) { - struct drm_i915_private *i915 = to_i915(display->drm); u32 pica_ier = 0; *pica_iir = 0; @@ -1325,7 +1323,7 @@ static void gen8_read_and_ack_pch_irqs(struct intel_display *display, u32 *pch_i * their flags both in the PICA and SDE IIR. */ if (*pch_iir & SDE_PICAINTERRUPT) { - drm_WARN_ON(display->drm, INTEL_PCH_TYPE(i915) < PCH_MTL); + drm_WARN_ON(display->drm, INTEL_PCH_TYPE(display) < PCH_MTL); pica_ier = intel_de_rmw(display, PICAINTERRUPT_IER, ~0, 0); *pica_iir = intel_de_read(display, PICAINTERRUPT_IIR); @@ -1340,7 +1338,6 @@ static void gen8_read_and_ack_pch_irqs(struct intel_display *display, u32 *pch_i void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 iir; enum pipe pipe; @@ -1465,7 +1462,7 @@ void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl) pipe, fault_errors); } - if (HAS_PCH_SPLIT(dev_priv) && !HAS_PCH_NOP(dev_priv) && + if (HAS_PCH_SPLIT(display) && !HAS_PCH_NOP(display) && master_ctl & GEN8_DE_PCH_IRQ) { u32 pica_iir; @@ -1479,9 +1476,9 @@ void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl) if (pica_iir) xelpdp_pica_irq_handler(display, pica_iir); - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) icp_irq_handler(display, iir); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) + else if (INTEL_PCH_TYPE(display) >= PCH_SPT) spt_irq_handler(display, iir); else cpt_irq_handler(display, iir); @@ -1998,7 +1995,6 @@ void gen8_display_irq_reset(struct intel_display *display) void gen11_display_irq_reset(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe; u32 trans_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C) | BIT(TRANSCODER_D); @@ -2043,7 +2039,7 @@ void gen11_display_irq_reset(struct intel_display *display) else intel_display_irq_regs_reset(display, GEN11_DE_HPD_IRQ_REGS); - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) intel_display_irq_regs_reset(display, SDE_IRQ_REGS); } @@ -2105,15 +2101,14 @@ void gen8_irq_power_well_pre_disable(struct intel_display *display, */ static void ibx_irq_postinstall(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 mask; - if (HAS_PCH_NOP(dev_priv)) + if (HAS_PCH_NOP(display)) return; - if (HAS_PCH_IBX(dev_priv)) + if (HAS_PCH_IBX(display)) mask = SDE_GMBUS | SDE_AUX_MASK | SDE_POISON; - else if (HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) + else if (HAS_PCH_CPT(display) || HAS_PCH_LPT(display)) mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT; else mask = SDE_GMBUS_CPT; @@ -2201,8 +2196,6 @@ static void icp_irq_postinstall(struct intel_display *display); void gen8_de_irq_postinstall(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - u32 de_pipe_masked = gen8_de_pipe_fault_mask(display) | GEN8_PIPE_CDCLK_CRC_DONE; u32 de_pipe_enables; @@ -2218,9 +2211,9 @@ void gen8_de_irq_postinstall(struct intel_display *display) if (DISPLAY_VER(display) >= 14) mtp_irq_postinstall(display); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + else if (INTEL_PCH_TYPE(display) >= PCH_ICP) icp_irq_postinstall(display); - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(display)) ibx_irq_postinstall(display); if (DISPLAY_VER(display) < 11) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c78315eb44fac..77f26d9856fd0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1365,11 +1365,9 @@ static void hsw_restore_lcpll(struct intel_display *display) */ static void hsw_enable_pc8(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - drm_dbg_kms(display->drm, "Enabling package C8+\n"); - if (HAS_PCH_LPT_LP(dev_priv)) + if (HAS_PCH_LPT_LP(display)) intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, PCH_LP_PARTITION_LEVEL_DISABLE, 0); @@ -1415,14 +1413,13 @@ static void intel_pch_reset_handshake(struct intel_display *display, static void skl_display_core_init(struct intel_display *display, bool resume) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; gen9_set_dc_state(display, DC_STATE_DISABLE); /* enable PCH reset handshake */ - intel_pch_reset_handshake(display, !HAS_PCH_NOP(dev_priv)); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(display)); if (!HAS_DISPLAY(display)) return; @@ -1624,20 +1621,19 @@ static void tgl_bw_buddy_init(struct intel_display *display) static void icl_display_core_init(struct intel_display *display, bool resume) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; struct i915_power_well *well; gen9_set_dc_state(display, DC_STATE_DISABLE); /* Wa_14011294188:ehl,jsl,tgl,rkl,adl-s */ - if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && - INTEL_PCH_TYPE(dev_priv) < PCH_DG1) + if (INTEL_PCH_TYPE(display) >= PCH_TGP && + INTEL_PCH_TYPE(display) < PCH_DG1) intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, 0, PCH_DPMGUNIT_CLOCK_GATE_DISABLE); /* 1. Enable PCH reset handshake. */ - intel_pch_reset_handshake(display, !HAS_PCH_NOP(dev_priv)); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(display)); if (!HAS_DISPLAY(display)) return; @@ -1908,7 +1904,6 @@ static void intel_power_domains_verify_state(struct intel_display *display); */ void intel_power_domains_init_hw(struct intel_display *display, bool resume) { - struct drm_i915_private *i915 = to_i915(display->drm); struct i915_power_domains *power_domains = &display->power.domains; power_domains->initializing = true; @@ -1932,9 +1927,9 @@ void intel_power_domains_init_hw(struct intel_display *display, bool resume) assert_isp_power_gated(display); } else if (display->platform.broadwell || display->platform.haswell) { hsw_assert_cdclk(display); - intel_pch_reset_handshake(display, !HAS_PCH_NOP(i915)); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(display)); } else if (display->platform.ivybridge) { - intel_pch_reset_handshake(display, !HAS_PCH_NOP(i915)); + intel_pch_reset_handshake(display, !HAS_PCH_NOP(display)); } /* @@ -2229,8 +2224,6 @@ static void intel_power_domains_verify_state(struct intel_display *display) void intel_display_power_suspend_late(struct intel_display *display, bool s2idle) { - struct drm_i915_private *i915 = to_i915(display->drm); - intel_power_domains_suspend(display, s2idle); if (DISPLAY_VER(display) >= 11 || display->platform.geminilake || @@ -2241,14 +2234,12 @@ void intel_display_power_suspend_late(struct intel_display *display, bool s2idle } /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */ - if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1) - intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); + if (INTEL_PCH_TYPE(display) >= PCH_CNP && INTEL_PCH_TYPE(display) < PCH_DG1) + intel_de_rmw(display, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); } void intel_display_power_resume_early(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(display) >= 11 || display->platform.geminilake || display->platform.broxton) { gen9_sanitize_dc_state(display); @@ -2258,8 +2249,8 @@ void intel_display_power_resume_early(struct intel_display *display) } /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */ - if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1) - intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); + if (INTEL_PCH_TYPE(display) >= PCH_CNP && INTEL_PCH_TYPE(display) < PCH_DG1) + intel_de_rmw(display, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); intel_power_domains_resume(display); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d7a30d0992b7a..660b1a8c39a46 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6344,7 +6344,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct intel_connector *connector) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); struct drm_display_mode *fixed_mode; struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; bool has_dpcd; @@ -6361,7 +6360,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, */ if (intel_get_lvds_encoder(display)) { drm_WARN_ON(display->drm, - !(HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))); + !(HAS_PCH_IBX(display) || HAS_PCH_CPT(display))); drm_info(display->drm, "LVDS was detected, not registering eDP\n"); diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 3245a15935dba..b6ef2116164cd 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -111,10 +111,9 @@ static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) static u32 hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(i915)) { + if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(display)) { /* Workaround for non-ULT HSW */ switch (index) { case 0: return 63; @@ -785,7 +784,6 @@ void intel_dp_aux_fini(struct intel_dp *intel_dp) void intel_dp_aux_init(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; enum aux_ch aux_ch = dig_port->aux_ch; @@ -800,7 +798,7 @@ void intel_dp_aux_init(struct intel_dp *intel_dp) } else if (DISPLAY_VER(display) >= 9) { intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg; intel_dp->aux_ch_data_reg = skl_aux_data_reg; - } else if (HAS_PCH_SPLIT(i915)) { + } else if (HAS_PCH_SPLIT(display)) { intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg; intel_dp->aux_ch_data_reg = ilk_aux_data_reg; } else if (display->platform.valleyview || display->platform.cherryview) { @@ -815,7 +813,7 @@ void intel_dp_aux_init(struct intel_dp *intel_dp) intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; else if (display->platform.broadwell || display->platform.haswell) intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; - else if (HAS_PCH_SPLIT(i915)) + else if (HAS_PCH_SPLIT(display)) intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; else intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider; diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 0481b1365b852..a9e9b98d0bf9c 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -374,12 +374,11 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock) static int i9xx_pll_refclk(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); const struct i9xx_dpll_hw_state *hw_state = &crtc_state->dpll_hw_state.i9xx; if ((hw_state->dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN) return display->vbt.lvds_ssc_freq; - else if (HAS_PCH_SPLIT(i915)) + else if (HAS_PCH_SPLIT(display)) return 120000; else if (DISPLAY_VER(display) != 2) return 96000; @@ -1235,12 +1234,10 @@ static int mtl_crtc_compute_clock(struct intel_atomic_state *state, static int ilk_fb_cb_factor(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) && ((intel_panel_use_ssc(display) && display->vbt.lvds_ssc_freq == 100000) || - (HAS_PCH_IBX(i915) && intel_is_dual_link_lvds(display)))) + (HAS_PCH_IBX(display) && intel_is_dual_link_lvds(display)))) return 25; if (crtc_state->sdvo_tv_clock) @@ -1791,15 +1788,13 @@ int intel_dpll_crtc_get_shared_dpll(struct intel_atomic_state *state, void intel_dpll_init_clock_hook(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (DISPLAY_VER(display) >= 14) display->funcs.dpll = &mtl_dpll_funcs; else if (display->platform.dg2) display->funcs.dpll = &dg2_dpll_funcs; else if (DISPLAY_VER(display) >= 9 || HAS_DDI(display)) display->funcs.dpll = &hsw_dpll_funcs; - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(display)) display->funcs.dpll = &ilk_dpll_funcs; else if (display->platform.cherryview) display->funcs.dpll = &chv_dpll_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 84df41086a892..c2f08b2ee455b 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -609,13 +609,12 @@ static int ibx_get_dpll(struct intel_atomic_state *state, struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(state); - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_shared_dpll *pll; enum intel_dpll_id id; - if (HAS_PCH_IBX(i915)) { + if (HAS_PCH_IBX(display)) { /* Ironlake PCH has a fixed PLL->PCH pipe mapping. */ id = (enum intel_dpll_id) crtc->pipe; pll = intel_get_shared_dpll_by_id(display, id); @@ -4305,7 +4304,6 @@ static const struct intel_dpll_mgr adlp_pll_mgr = { */ void intel_shared_dpll_init(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); const struct intel_dpll_mgr *dpll_mgr = NULL; const struct dpll_info *dpll_info; int i; @@ -4335,7 +4333,7 @@ void intel_shared_dpll_init(struct intel_display *display) dpll_mgr = &skl_pll_mgr; else if (HAS_DDI(display)) dpll_mgr = &hsw_pll_mgr; - else if (HAS_PCH_IBX(i915) || HAS_PCH_CPT(i915)) + else if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display)) dpll_mgr = &pch_pll_mgr; if (!dpll_mgr) diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index 40deee0769ae4..db68c20947abc 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -464,7 +464,6 @@ static void ivb_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_st void intel_fdi_normal_train(struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -483,7 +482,7 @@ void intel_fdi_normal_train(struct intel_crtc *crtc) reg = FDI_RX_CTL(pipe); temp = intel_de_read(display, reg); - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_NORMAL_CPT; } else { @@ -607,7 +606,6 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -647,7 +645,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc, reg = FDI_RX_CTL(pipe); temp = intel_de_read(display, reg); - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; } else { @@ -698,7 +696,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc, reg = FDI_RX_CTL(pipe); temp = intel_de_read(display, reg); - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_2_CPT; } else { @@ -1077,7 +1075,6 @@ void ilk_fdi_pll_disable(struct intel_crtc *crtc) void ilk_fdi_disable(struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -1096,7 +1093,7 @@ void ilk_fdi_disable(struct intel_crtc *crtc) udelay(100); /* Ironlake workaround, disable clock pointer after downing FDI */ - if (HAS_PCH_IBX(dev_priv)) + if (HAS_PCH_IBX(display)) intel_de_write(display, FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR); @@ -1106,7 +1103,7 @@ void ilk_fdi_disable(struct intel_crtc *crtc) reg = FDI_RX_CTL(pipe); temp = intel_de_read(display, reg); - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; } else { diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index 451cd26024f79..7c7cd29b09444 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -346,7 +346,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct intel_display *display, old = !crtc->pch_fifo_underrun_disabled; crtc->pch_fifo_underrun_disabled = !enable; - if (HAS_PCH_IBX(dev_priv)) + if (HAS_PCH_IBX(display)) ibx_set_fifo_underrun_reporting(display, pch_transcoder, enable); @@ -459,7 +459,7 @@ void intel_check_pch_fifo_underruns(struct intel_display *display) if (crtc->pch_fifo_underrun_disabled) continue; - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) cpt_check_pch_fifo_underruns(crtc); } diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 2a530bf35ebbf..d55cc77650b79 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -152,23 +152,22 @@ static const struct gmbus_pin gmbus_pins_mtp[] = { static const struct gmbus_pin *get_gmbus_pin(struct intel_display *display, unsigned int pin) { - struct drm_i915_private *i915 = to_i915(display->drm); const struct gmbus_pin *pins; size_t size; - if (INTEL_PCH_TYPE(i915) >= PCH_MTL) { + if (INTEL_PCH_TYPE(display) >= PCH_MTL) { pins = gmbus_pins_mtp; size = ARRAY_SIZE(gmbus_pins_mtp); - } else if (INTEL_PCH_TYPE(i915) >= PCH_DG2) { + } else if (INTEL_PCH_TYPE(display) >= PCH_DG2) { pins = gmbus_pins_dg2; size = ARRAY_SIZE(gmbus_pins_dg2); - } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) { + } else if (INTEL_PCH_TYPE(display) >= PCH_DG1) { pins = gmbus_pins_dg1; size = ARRAY_SIZE(gmbus_pins_dg1); - } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { + } else if (INTEL_PCH_TYPE(display) >= PCH_ICP) { pins = gmbus_pins_icp; size = ARRAY_SIZE(gmbus_pins_icp); - } else if (HAS_PCH_CNP(i915)) { + } else if (HAS_PCH_CNP(display)) { pins = gmbus_pins_cnp; size = ARRAY_SIZE(gmbus_pins_cnp); } else if (display->platform.geminilake || display->platform.broxton) { @@ -627,14 +626,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, { struct intel_gmbus *bus = to_intel_gmbus(adapter); struct intel_display *display = bus->display; - struct drm_i915_private *i915 = to_i915(display->drm); int i = 0, inc, try = 0; int ret = 0; /* Display WA #0868: skl,bxt,kbl,cfl,glk */ if (display->platform.geminilake || display->platform.broxton) bxt_gmbus_clock_gating(display, false); - else if (HAS_PCH_SPT(i915) || HAS_PCH_CNP(i915)) + else if (HAS_PCH_SPT(display) || HAS_PCH_CNP(display)) pch_gmbus_clock_gating(display, false); retry: @@ -747,7 +745,7 @@ out: /* Display WA #0868: skl,bxt,kbl,cfl,glk */ if (display->platform.geminilake || display->platform.broxton) bxt_gmbus_clock_gating(display, true); - else if (HAS_PCH_SPT(i915) || HAS_PCH_CNP(i915)) + else if (HAS_PCH_SPT(display) || HAS_PCH_CNP(display)) pch_gmbus_clock_gating(display, true); return ret; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 120c63dfdd02e..e08c01e5b9d3c 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -979,7 +979,6 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg; @@ -991,7 +990,7 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder, reg = HSW_TVIDEO_DIP_GCP(display, crtc_state->cpu_transcoder); else if (display->platform.valleyview || display->platform.cherryview) reg = VLV_TVIDEO_DIP_GCP(crtc->pipe); - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(display)) reg = TVIDEO_DIP_GCP(crtc->pipe); else return false; @@ -1005,7 +1004,6 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); i915_reg_t reg; @@ -1017,7 +1015,7 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder, reg = HSW_TVIDEO_DIP_GCP(display, crtc_state->cpu_transcoder); else if (display->platform.valleyview || display->platform.cherryview) reg = VLV_TVIDEO_DIP_GCP(crtc->pipe); - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(display)) reg = TVIDEO_DIP_GCP(crtc->pipe); else return; @@ -2807,7 +2805,7 @@ static u8 mcc_encoder_to_ddc_pin(struct intel_encoder *encoder) static u8 rkl_encoder_to_ddc_pin(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); enum phy phy = intel_encoder_to_phy(encoder); WARN_ON(encoder->port == PORT_C); @@ -2818,7 +2816,7 @@ static u8 rkl_encoder_to_ddc_pin(struct intel_encoder *encoder) * combo outputs. With CMP, the traditional DDI A-D pins are used for * all outputs. */ - if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && phy >= PHY_C) + if (INTEL_PCH_TYPE(display) >= PCH_TGP && phy >= PHY_C) return GMBUS_PIN_9_TC1_ICP + phy - PHY_C; return GMBUS_PIN_1_BXT + phy; @@ -2827,7 +2825,6 @@ static u8 rkl_encoder_to_ddc_pin(struct intel_encoder *encoder) static u8 gen9bc_tgp_encoder_to_ddc_pin(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_encoder_to_phy(encoder); drm_WARN_ON(display->drm, encoder->port == PORT_A); @@ -2838,7 +2835,7 @@ static u8 gen9bc_tgp_encoder_to_ddc_pin(struct intel_encoder *encoder) * combo outputs. With CMP, the traditional DDI A-D pins are used for * all outputs. */ - if (INTEL_PCH_TYPE(i915) >= PCH_TGP && phy >= PHY_C) + if (INTEL_PCH_TYPE(display) >= PCH_TGP && phy >= PHY_C) return GMBUS_PIN_9_TC1_ICP + phy - PHY_C; return GMBUS_PIN_1_BXT + phy; @@ -2891,23 +2888,22 @@ static u8 g4x_encoder_to_ddc_pin(struct intel_encoder *encoder) static u8 intel_hdmi_default_ddc_pin(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u8 ddc_pin; if (display->platform.alderlake_s) ddc_pin = adls_encoder_to_ddc_pin(encoder); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1) + else if (INTEL_PCH_TYPE(display) >= PCH_DG1) ddc_pin = dg1_encoder_to_ddc_pin(encoder); else if (display->platform.rocketlake) ddc_pin = rkl_encoder_to_ddc_pin(encoder); - else if (DISPLAY_VER(display) == 9 && HAS_PCH_TGP(dev_priv)) + else if (DISPLAY_VER(display) == 9 && HAS_PCH_TGP(display)) ddc_pin = gen9bc_tgp_encoder_to_ddc_pin(encoder); else if ((display->platform.jasperlake || display->platform.elkhartlake) && - HAS_PCH_TGP(dev_priv)) + HAS_PCH_TGP(display)) ddc_pin = mcc_encoder_to_ddc_pin(encoder); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + else if (INTEL_PCH_TYPE(display) >= PCH_ICP) ddc_pin = icl_encoder_to_ddc_pin(encoder); - else if (HAS_PCH_CNP(dev_priv)) + else if (HAS_PCH_CNP(display)) ddc_pin = cnp_encoder_to_ddc_pin(encoder); else if (display->platform.geminilake || display->platform.broxton) ddc_pin = bxt_encoder_to_ddc_pin(encoder); @@ -2985,8 +2981,6 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) void intel_infoframe_init(struct intel_digital_port *dig_port) { struct intel_display *display = to_intel_display(dig_port); - struct drm_i915_private *dev_priv = - to_i915(dig_port->base.base.dev); if (display->platform.valleyview || display->platform.cherryview) { dig_port->write_infoframe = vlv_write_infoframe; @@ -3010,7 +3004,7 @@ void intel_infoframe_init(struct intel_digital_port *dig_port) dig_port->set_infoframes = hsw_set_infoframes; dig_port->infoframes_enabled = hsw_infoframes_enabled; } - } else if (HAS_PCH_IBX(dev_priv)) { + } else if (HAS_PCH_IBX(display)) { dig_port->write_infoframe = ibx_write_infoframe; dig_port->read_infoframe = ibx_read_infoframe; dig_port->set_infoframes = ibx_set_infoframes; diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c index 2463e61e7802b..c841399e5c889 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c @@ -133,7 +133,6 @@ static const u32 hpd_mtp[HPD_NUM_PINS] = { static void intel_hpd_init_pins(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_hotplug *hpd = &display->hotplug; if (HAS_GMCH(display)) { @@ -160,24 +159,24 @@ static void intel_hpd_init_pins(struct intel_display *display) else hpd->hpd = hpd_ilk; - if ((INTEL_PCH_TYPE(dev_priv) < PCH_DG1) && - (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv))) + if ((INTEL_PCH_TYPE(display) < PCH_DG1) && + (!HAS_PCH_SPLIT(display) || HAS_PCH_NOP(display))) return; - if (INTEL_PCH_TYPE(dev_priv) >= PCH_MTL) + if (INTEL_PCH_TYPE(display) >= PCH_MTL) hpd->pch_hpd = hpd_mtp; - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1) + else if (INTEL_PCH_TYPE(display) >= PCH_DG1) hpd->pch_hpd = hpd_sde_dg1; - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + else if (INTEL_PCH_TYPE(display) >= PCH_ICP) hpd->pch_hpd = hpd_icp; - else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv)) + else if (HAS_PCH_CNP(display) || HAS_PCH_SPT(display)) hpd->pch_hpd = hpd_spt; - else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_CPT(dev_priv)) + else if (HAS_PCH_LPT(display) || HAS_PCH_CPT(display)) hpd->pch_hpd = hpd_cpt; - else if (HAS_PCH_IBX(dev_priv)) + else if (HAS_PCH_IBX(display)) hpd->pch_hpd = hpd_ibx; else - MISSING_CASE(INTEL_PCH_TYPE(dev_priv)); + MISSING_CASE(INTEL_PCH_TYPE(display)); } /* For display hotplug interrupt */ @@ -711,7 +710,7 @@ static u32 ibx_hotplug_mask(enum hpd_pin hpd_pin) static u32 ibx_hotplug_enables(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); switch (encoder->hpd_pin) { case HPD_PORT_A: @@ -719,7 +718,7 @@ static u32 ibx_hotplug_enables(struct intel_encoder *encoder) * When CPU and PCH are on the same package, port A * HPD must be enabled in both north and south. */ - return HAS_PCH_LPT_LP(i915) ? + return HAS_PCH_LPT_LP(display) ? PORTA_HOTPLUG_ENABLE : 0; case HPD_PORT_B: return PORTB_HOTPLUG_ENABLE | @@ -940,18 +939,17 @@ static void gen11_tbt_hpd_enable_detection(struct intel_encoder *encoder) static void gen11_hpd_enable_detection(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); gen11_tc_hpd_enable_detection(encoder); gen11_tbt_hpd_enable_detection(encoder); - if (INTEL_PCH_TYPE(i915) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) icp_hpd_enable_detection(encoder); } static void gen11_hpd_irq_setup(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 hotplug_irqs, enabled_irqs; enabled_irqs = intel_hpd_enabled_irqs(display, display->hotplug.hpd); @@ -964,7 +962,7 @@ static void gen11_hpd_irq_setup(struct intel_display *display) gen11_tc_hpd_detection_setup(display); gen11_tbt_hpd_detection_setup(display); - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) icp_hpd_irq_setup(display); } @@ -1138,7 +1136,6 @@ static void xelpdp_hpd_enable_detection(struct intel_encoder *encoder) static void xelpdp_hpd_irq_setup(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); u32 hotplug_irqs, enabled_irqs; enabled_irqs = intel_hpd_enabled_irqs(display, display->hotplug.hpd); @@ -1150,9 +1147,9 @@ static void xelpdp_hpd_irq_setup(struct intel_display *display) xelpdp_pica_hpd_detection_setup(display); - if (INTEL_PCH_TYPE(i915) >= PCH_LNL) + if (INTEL_PCH_TYPE(display) >= PCH_LNL) xe2lpd_sde_hpd_irq_setup(display); - else if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + else if (INTEL_PCH_TYPE(display) >= PCH_MTL) mtp_hpd_irq_setup(display); } @@ -1194,10 +1191,8 @@ static u32 spt_hotplug2_enables(struct intel_encoder *encoder) static void spt_hpd_detection_setup(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - /* Display WA #1179 WaHardHangonHotPlug: cnp */ - if (HAS_PCH_CNP(dev_priv)) { + if (HAS_PCH_CNP(display)) { intel_de_rmw(display, SOUTH_CHICKEN1, CHASSIS_CLK_REQ_DURATION_MASK, CHASSIS_CLK_REQ_DURATION(0xf)); } @@ -1215,10 +1210,9 @@ static void spt_hpd_detection_setup(struct intel_display *display) static void spt_hpd_enable_detection(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); /* Display WA #1179 WaHardHangonHotPlug: cnp */ - if (HAS_PCH_CNP(i915)) { + if (HAS_PCH_CNP(display)) { intel_de_rmw(display, SOUTH_CHICKEN1, CHASSIS_CLK_REQ_DURATION_MASK, CHASSIS_CLK_REQ_DURATION(0xf)); @@ -1235,10 +1229,9 @@ static void spt_hpd_enable_detection(struct intel_encoder *encoder) static void spt_hpd_irq_setup(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 hotplug_irqs, enabled_irqs; - if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) + if (INTEL_PCH_TYPE(display) >= PCH_CNP) intel_de_write(display, SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); enabled_irqs = intel_hpd_enabled_irqs(display, display->hotplug.pch_hpd); @@ -1474,8 +1467,6 @@ void intel_hpd_irq_setup(struct intel_display *display) void intel_hotplug_irq_init(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - intel_hpd_init_pins(display); intel_hpd_init_early(display); @@ -1484,9 +1475,9 @@ void intel_hotplug_irq_init(struct intel_display *display) if (HAS_HOTPLUG(display)) display->funcs.hotplug = &i915_hpd_funcs; } else { - if (HAS_PCH_DG2(i915)) + if (HAS_PCH_DG2(display)) display->funcs.hotplug = &icp_hpd_funcs; - else if (HAS_PCH_DG1(i915)) + else if (HAS_PCH_DG1(display)) display->funcs.hotplug = &dg1_hpd_funcs; else if (DISPLAY_VER(display) >= 14) display->funcs.hotplug = &xelpdp_hpd_funcs; @@ -1494,9 +1485,9 @@ void intel_hotplug_irq_init(struct intel_display *display) display->funcs.hotplug = &gen11_hpd_funcs; else if (display->platform.geminilake || display->platform.broxton) display->funcs.hotplug = &bxt_hpd_funcs; - else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) + else if (INTEL_PCH_TYPE(display) >= PCH_ICP) display->funcs.hotplug = &icp_hpd_funcs; - else if (INTEL_PCH_TYPE(i915) >= PCH_SPT) + else if (INTEL_PCH_TYPE(display) >= PCH_SPT) display->funcs.hotplug = &spt_hpd_funcs; else display->funcs.hotplug = &ilk_hpd_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 89d26913e2539..495f81cf5667c 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -87,13 +87,12 @@ static struct intel_lvds_encoder *to_lvds_encoder(struct intel_encoder *encoder) bool intel_lvds_port_enabled(struct intel_display *display, i915_reg_t lvds_reg, enum pipe *pipe) { - struct drm_i915_private *i915 = to_i915(display->drm); u32 val; val = intel_de_read(display, lvds_reg); /* asserts want to know the pipe even if the port is disabled */ - if (HAS_PCH_CPT(i915)) + if (HAS_PCH_CPT(display)) *pipe = REG_FIELD_GET(LVDS_PIPE_SEL_MASK_CPT, val); else *pipe = REG_FIELD_GET(LVDS_PIPE_SEL_MASK, val); @@ -243,13 +242,12 @@ static void intel_pre_enable_lvds(struct intel_atomic_state *state, { struct intel_display *display = to_intel_display(state); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; enum pipe pipe = crtc->pipe; u32 temp; - if (HAS_PCH_SPLIT(i915)) { + if (HAS_PCH_SPLIT(display)) { assert_fdi_rx_pll_disabled(display, pipe); assert_shared_dpll_disabled(display, crtc_state->shared_dpll); } else { @@ -261,7 +259,7 @@ static void intel_pre_enable_lvds(struct intel_atomic_state *state, temp = lvds_encoder->init_lvds_val; temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; - if (HAS_PCH_CPT(i915)) { + if (HAS_PCH_CPT(display)) { temp &= ~LVDS_PIPE_SEL_MASK_CPT; temp |= LVDS_PIPE_SEL_CPT(pipe); } else { @@ -421,7 +419,6 @@ static int intel_lvds_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(encoder); struct intel_connector *connector = lvds_encoder->attached_connector; struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -435,7 +432,7 @@ static int intel_lvds_compute_config(struct intel_encoder *encoder, return -EINVAL; } - if (HAS_PCH_SPLIT(i915)) { + if (HAS_PCH_SPLIT(display)) { crtc_state->has_pch_encoder = true; if (!intel_fdi_compute_pipe_bpp(crtc_state)) return -EINVAL; @@ -798,7 +795,6 @@ bool intel_is_dual_link_lvds(struct intel_display *display) static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) { struct intel_display *display = to_intel_display(&lvds_encoder->base); - struct drm_i915_private *i915 = to_i915(lvds_encoder->base.base.dev); struct intel_connector *connector = lvds_encoder->attached_connector; const struct drm_display_mode *fixed_mode = intel_panel_preferred_fixed_mode(connector); @@ -822,7 +818,7 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) * register is uninitialized. */ val = intel_de_read(display, lvds_encoder->reg); - if (HAS_PCH_CPT(i915)) + if (HAS_PCH_CPT(display)) val &= ~(LVDS_DETECTED | LVDS_PIPE_SEL_MASK_CPT); else val &= ~(LVDS_DETECTED | LVDS_PIPE_SEL_MASK); @@ -846,7 +842,6 @@ static void intel_lvds_add_properties(struct drm_connector *connector) */ void intel_lvds_init(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_lvds_encoder *lvds_encoder; struct intel_connector *connector; const struct drm_edid *drm_edid; @@ -868,14 +863,14 @@ void intel_lvds_init(struct intel_display *display) return; } - if (HAS_PCH_SPLIT(i915)) + if (HAS_PCH_SPLIT(display)) lvds_reg = PCH_LVDS; else lvds_reg = LVDS; lvds = intel_de_read(display, lvds_reg); - if (HAS_PCH_SPLIT(i915)) { + if (HAS_PCH_SPLIT(display)) { if ((lvds & LVDS_DETECTED) == 0) return; } @@ -915,7 +910,7 @@ void intel_lvds_init(struct intel_display *display) encoder->enable = intel_enable_lvds; encoder->pre_enable = intel_pre_enable_lvds; encoder->compute_config = intel_lvds_compute_config; - if (HAS_PCH_SPLIT(i915)) { + if (HAS_PCH_SPLIT(display)) { encoder->disable = pch_disable_lvds; encoder->post_disable = pch_post_disable_lvds; } else { diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c index b909ed18a5b2b..de77d6487d797 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_display.c +++ b/drivers/gpu/drm/i915/display/intel_pch_display.c @@ -23,17 +23,15 @@ bool intel_has_pch_trancoder(struct intel_display *display, enum pipe pch_transcoder) { - struct drm_i915_private *i915 = to_i915(display->drm); - - return HAS_PCH_IBX(i915) || HAS_PCH_CPT(i915) || - (HAS_PCH_LPT_H(i915) && pch_transcoder == PIPE_A); + return HAS_PCH_IBX(display) || HAS_PCH_CPT(display) || + (HAS_PCH_LPT_H(display) && pch_transcoder == PIPE_A); } enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); - if (HAS_PCH_LPT(i915)) + if (HAS_PCH_LPT(display)) return PIPE_A; else return crtc->pipe; @@ -43,7 +41,6 @@ static void assert_pch_dp_disabled(struct intel_display *display, enum pipe pipe, enum port port, i915_reg_t dp_reg) { - struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe port_pipe; bool state; @@ -54,7 +51,7 @@ static void assert_pch_dp_disabled(struct intel_display *display, port_name(port), pipe_name(pipe)); INTEL_DISPLAY_STATE_WARN(display, - HAS_PCH_IBX(dev_priv) && !state && port_pipe == PIPE_B, + HAS_PCH_IBX(display) && !state && port_pipe == PIPE_B, "IBX PCH DP %c still using transcoder B\n", port_name(port)); } @@ -63,7 +60,6 @@ static void assert_pch_hdmi_disabled(struct intel_display *display, enum pipe pipe, enum port port, i915_reg_t hdmi_reg) { - struct drm_i915_private *dev_priv = to_i915(display->drm); enum pipe port_pipe; bool state; @@ -74,7 +70,7 @@ static void assert_pch_hdmi_disabled(struct intel_display *display, port_name(port), pipe_name(pipe)); INTEL_DISPLAY_STATE_WARN(display, - HAS_PCH_IBX(dev_priv) && !state && port_pipe == PIPE_B, + HAS_PCH_IBX(display) && !state && port_pipe == PIPE_B, "IBX PCH HDMI %c still using transcoder B\n", port_name(port)); } @@ -249,7 +245,6 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 val, pipeconf_val; @@ -261,7 +256,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) assert_fdi_tx_enabled(display, pipe); assert_fdi_rx_enabled(display, pipe); - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { reg = TRANS_CHICKEN2(pipe); val = intel_de_read(display, reg); /* @@ -279,7 +274,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) val = intel_de_read(display, reg); pipeconf_val = intel_de_read(display, TRANSCONF(display, pipe)); - if (HAS_PCH_IBX(dev_priv)) { + if (HAS_PCH_IBX(display)) { /* Configure frame start delay to match the CPU */ val &= ~TRANS_FRAME_START_DELAY_MASK; val |= TRANS_FRAME_START_DELAY(crtc_state->framestart_delay - 1); @@ -298,7 +293,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) val &= ~TRANS_INTERLACE_MASK; if ((pipeconf_val & TRANSCONF_INTERLACE_MASK_ILK) == TRANSCONF_INTERLACE_IF_ID_ILK) { - if (HAS_PCH_IBX(dev_priv) && + if (HAS_PCH_IBX(display) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_SDVO)) val |= TRANS_INTERLACE_LEGACY_VSYNC_IBX; else @@ -316,7 +311,6 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) static void ilk_disable_pch_transcoder(struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t reg; @@ -334,7 +328,7 @@ static void ilk_disable_pch_transcoder(struct intel_crtc *crtc) drm_err(display->drm, "failed to disable transcoder %c\n", pipe_name(pipe)); - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) /* Workaround: Clear the timing override chicken bit again. */ intel_de_rmw(display, TRANS_CHICKEN2(pipe), TRANS_CHICKEN2_TIMING_OVERRIDE, 0); @@ -366,7 +360,6 @@ void ilk_pch_enable(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); enum pipe pipe = crtc->pipe; @@ -381,7 +374,7 @@ void ilk_pch_enable(struct intel_atomic_state *state, * We need to program the right clock selection * before writing the pixel multiplier into the DPLL. */ - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { u32 sel; temp = intel_de_read(display, PCH_DPLL_SEL); @@ -417,7 +410,7 @@ void ilk_pch_enable(struct intel_atomic_state *state, intel_fdi_normal_train(crtc); /* For PCH DP, enable TRANS_DP_CTL */ - if (HAS_PCH_CPT(dev_priv) && + if (HAS_PCH_CPT(display) && intel_crtc_has_dp_encoder(crtc_state)) { const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -459,14 +452,13 @@ void ilk_pch_post_disable(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); enum pipe pipe = crtc->pipe; ilk_disable_pch_transcoder(crtc); - if (HAS_PCH_CPT(dev_priv)) { + if (HAS_PCH_CPT(display)) { /* disable TRANS_DP_CTL */ intel_de_rmw(display, TRANS_DP_CTL(pipe), TRANS_DP_OUTPUT_ENABLE | TRANS_DP_PORT_SEL_MASK, @@ -503,7 +495,6 @@ void ilk_pch_get_config(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll; enum pipe pipe = crtc->pipe; enum intel_dpll_id pll_id; @@ -522,7 +513,7 @@ void ilk_pch_get_config(struct intel_crtc_state *crtc_state) intel_cpu_transcoder_get_m1_n1(crtc, crtc_state->cpu_transcoder, &crtc_state->fdi_m_n); - if (HAS_PCH_IBX(dev_priv)) { + if (HAS_PCH_IBX(display)) { /* * The pipe->pch transcoder and pch transcoder->pll * mapping is fixed. @@ -646,8 +637,6 @@ void lpt_pch_get_config(struct intel_crtc_state *crtc_state) void intel_pch_sanitize(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - if (HAS_PCH_IBX(i915)) + if (HAS_PCH_IBX(display)) ibx_sanitize_pch_ports(display); } diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c index 1307a478861ae..693b90e3dfc3c 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c +++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c @@ -281,7 +281,7 @@ static void lpt_enable_clkout_dp(struct intel_display *display, if (drm_WARN(display->drm, with_fdi && !with_spread, "FDI requires downspread\n")) with_spread = true; - if (drm_WARN(display->drm, HAS_PCH_LPT_LP(dev_priv) && + if (drm_WARN(display->drm, HAS_PCH_LPT_LP(display) && with_fdi, "LP PCH doesn't have FDI\n")) with_fdi = false; @@ -303,7 +303,7 @@ static void lpt_enable_clkout_dp(struct intel_display *display, lpt_fdi_program_mphy(display); } - reg = HAS_PCH_LPT_LP(dev_priv) ? SBI_GEN0 : SBI_DBUFF0; + reg = HAS_PCH_LPT_LP(display) ? SBI_GEN0 : SBI_DBUFF0; tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK); tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE; intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK); @@ -319,7 +319,7 @@ void lpt_disable_clkout_dp(struct intel_display *display) intel_sbi_lock(dev_priv); - reg = HAS_PCH_LPT_LP(dev_priv) ? SBI_GEN0 : SBI_DBUFF0; + reg = HAS_PCH_LPT_LP(display) ? SBI_GEN0 : SBI_DBUFF0; tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK); tmp &= ~SBI_GEN0_CFG_BUFFENABLE_DISABLE; intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK); @@ -498,7 +498,6 @@ static void lpt_init_pch_refclk(struct intel_display *display) static void ilk_init_pch_refclk(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_encoder *encoder; struct intel_shared_dpll *pll; int i; @@ -527,7 +526,7 @@ static void ilk_init_pch_refclk(struct intel_display *display) } } - if (HAS_PCH_IBX(dev_priv)) { + if (HAS_PCH_IBX(display)) { has_ck505 = display->vbt.display_clock_mode; can_ssc = has_ck505; } else { @@ -678,10 +677,8 @@ static void ilk_init_pch_refclk(struct intel_display *display) */ void intel_init_pch_refclk(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display)) ilk_init_pch_refclk(display); - else if (HAS_PCH_LPT(dev_priv)) + else if (HAS_PCH_LPT(display)) lpt_init_pch_refclk(display); } diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 4d4e2b9f5f2dc..05e1e5c7e8b7c 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -350,21 +350,19 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) static int intel_num_pps(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (display->platform.valleyview || display->platform.cherryview) return 2; if (display->platform.geminilake || display->platform.broxton) return 2; - if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + if (INTEL_PCH_TYPE(display) >= PCH_MTL) return 2; - if (INTEL_PCH_TYPE(i915) >= PCH_DG1) + if (INTEL_PCH_TYPE(display) >= PCH_DG1) return 1; - if (INTEL_PCH_TYPE(i915) >= PCH_ICP) + if (INTEL_PCH_TYPE(display) >= PCH_ICP) return 2; return 1; @@ -373,11 +371,10 @@ static int intel_num_pps(struct intel_display *display) static bool intel_pps_is_valid(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); if (intel_dp->pps.pps_idx == 1 && - INTEL_PCH_TYPE(i915) >= PCH_ICP && - INTEL_PCH_TYPE(i915) <= PCH_ADP) + INTEL_PCH_TYPE(display) >= PCH_ICP && + INTEL_PCH_TYPE(display) <= PCH_ADP) return intel_de_read(display, SOUTH_CHICKEN1) & ICP_SECOND_PPS_IO_SELECT; return true; @@ -499,7 +496,6 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, struct pps_registers *regs) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); int pps_idx; memset(regs, 0, sizeof(*regs)); @@ -518,7 +514,7 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, /* Cycle delay moved from PP_DIVISOR to PP_CONTROL */ if (display->platform.geminilake || display->platform.broxton || - INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) + INTEL_PCH_TYPE(display) >= PCH_CNP) regs->pp_div = INVALID_MMIO_REG; else regs->pp_div = PP_DIVISOR(display, pps_idx); @@ -1591,7 +1587,6 @@ static void pps_init_delays(struct intel_dp *intel_dp) static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 pp_on, pp_off, port_sel = 0; int div = DISPLAY_RUNTIME_INFO(display)->rawclk_freq / 1000; struct pps_registers regs; @@ -1638,7 +1633,7 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd * power sequencer any more. */ if (display->platform.valleyview || display->platform.cherryview) { port_sel = PANEL_PORT_SELECT_VLV(port); - } else if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { + } else if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display)) { switch (port) { case PORT_A: port_sel = PANEL_PORT_SELECT_DPA; @@ -1791,9 +1786,7 @@ void intel_pps_unlock_regs_wa(struct intel_display *display) void intel_pps_setup(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - if (HAS_PCH_SPLIT(i915) || display->platform.geminilake || display->platform.broxton) + if (HAS_PCH_SPLIT(display) || display->platform.geminilake || display->platform.broxton) display->pps.mmio_base = PCH_PPS_BASE; else if (display->platform.valleyview || display->platform.cherryview) display->pps.mmio_base = VLV_PPS_BASE; @@ -1836,7 +1829,6 @@ void intel_pps_connector_debugfs_add(struct intel_connector *connector) void assert_pps_unlocked(struct intel_display *display, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); i915_reg_t pp_reg; u32 val; enum pipe panel_pipe = INVALID_PIPE; @@ -1845,7 +1837,7 @@ void assert_pps_unlocked(struct intel_display *display, enum pipe pipe) if (drm_WARN_ON(display->drm, HAS_DDI(display))) return; - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(display)) { u32 port_sel; pp_reg = PP_CONTROL(display, 0); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 757b9ce7e3b1c..c8e5ad4c4e79a 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -214,18 +214,17 @@ intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val) { struct intel_display *display = to_intel_display(&intel_sdvo->base); - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 bval = val, cval = val; int i; - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(display)) { intel_de_write(display, intel_sdvo->sdvo_reg, val); intel_de_posting_read(display, intel_sdvo->sdvo_reg); /* * HW workaround, need to write this twice for issue * that may result in first write getting masked. */ - if (HAS_PCH_IBX(dev_priv)) { + if (HAS_PCH_IBX(display)) { intel_de_write(display, intel_sdvo->sdvo_reg, val); intel_de_posting_read(display, intel_sdvo->sdvo_reg); } @@ -1360,14 +1359,13 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(conn_state->connector); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct drm_display_mode *mode = &pipe_config->hw.mode; - if (HAS_PCH_SPLIT(i915)) { + if (HAS_PCH_SPLIT(display)) { pipe_config->has_pch_encoder = true; if (!intel_fdi_compute_pipe_bpp(pipe_config)) return -EINVAL; @@ -1527,7 +1525,6 @@ static void intel_sdvo_pre_enable(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(intel_encoder); - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; const struct intel_sdvo_connector_state *sdvo_state = @@ -1634,7 +1631,7 @@ static void intel_sdvo_pre_enable(struct intel_atomic_state *state, sdvox |= (9 << 19) | SDVO_BORDER_ENABLE; } - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) sdvox |= SDVO_PIPE_SEL_CPT(crtc->pipe); else sdvox |= SDVO_PIPE_SEL(crtc->pipe); @@ -1670,13 +1667,12 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector) bool intel_sdvo_port_enabled(struct intel_display *display, i915_reg_t sdvo_reg, enum pipe *pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 val; val = intel_de_read(display, sdvo_reg); /* asserts want to know the pipe even if the port is disabled */ - if (HAS_PCH_CPT(dev_priv)) + if (HAS_PCH_CPT(display)) *pipe = (val & SDVO_PIPE_SEL_MASK_CPT) >> SDVO_PIPE_SEL_SHIFT_CPT; else if (display->platform.cherryview) *pipe = (val & SDVO_PIPE_SEL_MASK_CHV) >> SDVO_PIPE_SEL_SHIFT_CHV; @@ -1841,7 +1837,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; @@ -1861,7 +1856,7 @@ static void intel_disable_sdvo(struct intel_atomic_state *state, * to transcoder A after disabling it to allow the * matching DP port to be enabled on transcoder A. */ - if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) { + if (HAS_PCH_IBX(display) && crtc->pipe == PIPE_B) { /* * We get CPU/PCH FIFO underruns on the other pipe when * doing the workaround. Sweep them under the rug. @@ -3367,9 +3362,7 @@ intel_sdvo_init_ddc_proxy(struct intel_sdvo_ddc *ddc, static bool is_sdvo_port_valid(struct intel_display *display, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - - if (HAS_PCH_SPLIT(dev_priv)) + if (HAS_PCH_SPLIT(display)) return port == PORT_B; else return port == PORT_B || port == PORT_C; @@ -3384,7 +3377,6 @@ static bool assert_sdvo_port_valid(struct intel_display *display, enum port port bool intel_sdvo_init(struct intel_display *display, i915_reg_t sdvo_reg, enum port port) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_encoder *intel_encoder; struct intel_sdvo *intel_sdvo; int i; @@ -3427,7 +3419,7 @@ bool intel_sdvo_init(struct intel_display *display, } intel_encoder->compute_config = intel_sdvo_compute_config; - if (HAS_PCH_SPLIT(dev_priv)) { + if (HAS_PCH_SPLIT(display)) { intel_encoder->disable = pch_disable_sdvo; intel_encoder->post_disable = pch_post_disable_sdvo; } else { -- GitLab From 2958620abcb2a8f61f007cc588ababac555a062e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 17 Apr 2025 12:10:37 +0300 Subject: [PATCH 403/899] drm/i915/display: drop lots of unnecessary #include i915_drv.h With the PCH macros switched to use struct intel_display, we have a number of files that no longer need struct drm_i915_private or anything else from i915_drv.h anymore. Remove the #include, and add the missing includes that were previously implicit. v2: Drop even more of the includes Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/5dc9e6a98461c344febac4c645875d8688eba906.1744880985.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/g4x_dp.c | 4 +++- drivers/gpu/drm/i915/display/g4x_hdmi.c | 3 ++- drivers/gpu/drm/i915/display/intel_audio.c | 2 +- drivers/gpu/drm/i915/display/intel_backlight.c | 5 ++++- drivers/gpu/drm/i915/display/intel_cmtg.c | 1 - drivers/gpu/drm/i915/display/intel_crt.c | 2 +- drivers/gpu/drm/i915/display/intel_cursor.c | 3 ++- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 4 +++- drivers/gpu/drm/i915/display/intel_ddi.c | 3 ++- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 3 ++- drivers/gpu/drm/i915/display/intel_display_device.c | 8 +++++--- drivers/gpu/drm/i915/display/intel_dp_aux.c | 4 +++- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 +++- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 5 ++++- drivers/gpu/drm/i915/display/intel_dsb.c | 3 ++- drivers/gpu/drm/i915/display/intel_fdi.c | 5 +++-- drivers/gpu/drm/i915/display/intel_global_state.c | 5 ++++- drivers/gpu/drm/i915/display/intel_lvds.c | 2 +- drivers/gpu/drm/i915/display/intel_pch_display.c | 3 ++- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- drivers/gpu/drm/i915/display/intel_sprite.c | 3 ++- drivers/gpu/drm/i915/display/intel_vga.c | 5 ++++- drivers/gpu/drm/i915/display/skl_scaler.c | 4 +++- 23 files changed, 57 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 9a8c42d6aea90..38c7eea2414f4 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -7,9 +7,11 @@ #include +#include + #include "g4x_dp.h" -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_audio.h" #include "intel_backlight.h" #include "intel_connector.h" diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 4e8b13e9ddd8f..1d252432d729a 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -5,8 +5,9 @@ * HDMI support for G4x,ILK,SNB,IVB,VLV,CHV (HSW+ handled by the DDI code). */ +#include + #include "g4x_hdmi.h" -#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_audio.h" diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ef07b54882619..40d8bbd8107d6 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include "i915_drv.h" #include "intel_atomic.h" #include "intel_audio.h" #include "intel_audio_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 5470fee7a707e..5827da5860032 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -10,8 +10,11 @@ #include -#include "i915_drv.h" +#include +#include + #include "i915_reg.h" +#include "i915_utils.h" #include "intel_backlight.h" #include "intel_backlight_regs.h" #include "intel_connector.h" diff --git a/drivers/gpu/drm/i915/display/intel_cmtg.c b/drivers/gpu/drm/i915/display/intel_cmtg.c index 07d7f4e8f60f1..82606ebae1de9 100644 --- a/drivers/gpu/drm/i915/display/intel_cmtg.c +++ b/drivers/gpu/drm/i915/display/intel_cmtg.c @@ -9,7 +9,6 @@ #include #include -#include "i915_drv.h" #include "i915_reg.h" #include "intel_crtc.h" #include "intel_cmtg.h" diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 415e594053180..38b50a779b6bb 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -31,9 +31,9 @@ #include #include #include +#include #include -#include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" #include "intel_connector.h" diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 3276a5b4a9b00..2fec5ba58373a 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -9,10 +9,11 @@ #include #include #include +#include #include -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_atomic.h" #include "intel_atomic_plane.h" #include "intel_cursor.h" diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 22595766eac53..b09f724c3046b 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -6,8 +6,10 @@ #include #include -#include "i915_drv.h" +#include + #include "i915_reg.h" +#include "i915_utils.h" #include "intel_cx0_phy.h" #include "intel_cx0_phy_regs.h" #include "intel_ddi.h" diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 99142b8cf5690..e90fa7984e28b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -30,10 +30,11 @@ #include #include +#include #include -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "icl_dsi.h" #include "intel_audio.h" #include "intel_audio_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 957c9a579a18f..8d0a1779dd193 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -7,11 +7,12 @@ #include #include +#include #include +#include #include #include "hsw_ips.h" -#include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" #include "i9xx_wm_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 61f76487397af..90d7145986643 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -3,11 +3,13 @@ * Copyright © 2023 Intel Corporation */ -#include -#include #include -#include "i915_drv.h" +#include +#include +#include +#include + #include "i915_reg.h" #include "intel_cx0_phy_regs.h" #include "intel_de.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index b6ef2116164cd..bf8e8e0cc19c9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -3,8 +3,10 @@ * Copyright © 2020-2021 Intel Corporation */ -#include "i915_drv.h" +#include + #include "i915_reg.h" +#include "i915_utils.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 4c15dcb103aa2..d19ef1fef452b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -27,10 +27,11 @@ #include #include #include +#include #include -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_atomic.h" #include "intel_audio.h" #include "intel_connector.h" @@ -51,6 +52,7 @@ #include "intel_link_bw.h" #include "intel_pfit.h" #include "intel_psr.h" +#include "intel_step.h" #include "intel_vdsc.h" #include "intel_vrr.h" #include "skl_scaler.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index c2f08b2ee455b..9da051a3f4550 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -24,9 +24,11 @@ #include #include +#include + #include "bxt_dpio_phy_regs.h" -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_cx0_phy.h" #include "intel_de.h" #include "intel_display_types.h" @@ -38,6 +40,7 @@ #include "intel_hti.h" #include "intel_mg_phy_regs.h" #include "intel_pch_refclk.h" +#include "intel_step.h" #include "intel_tc.h" /** diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 72fe390c5af22..481488d1fe677 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -4,11 +4,12 @@ * */ +#include #include -#include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_rpm.h" diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index db68c20947abc..169bbe154b5c5 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -6,15 +6,16 @@ #include #include +#include -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_atomic.h" #include "intel_crtc.h" #include "intel_ddi.h" #include "intel_de.h" -#include "intel_dp.h" #include "intel_display_types.h" +#include "intel_dp.h" #include "intel_fdi.h" #include "intel_fdi_regs.h" #include "intel_link_bw.h" diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 8a49e2bb37faa..000a898c94805 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -3,10 +3,13 @@ * Copyright © 2020 Intel Corporation */ +#include #include -#include "i915_drv.h" +#include + #include "intel_atomic.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_global_state.h" diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 495f81cf5667c..8ce7c630da527 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -37,9 +37,9 @@ #include #include #include +#include #include -#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_backlight.h" diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c index de77d6487d797..1743ebf551cbe 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_display.c +++ b/drivers/gpu/drm/i915/display/intel_pch_display.c @@ -3,8 +3,9 @@ * Copyright © 2021 Intel Corporation */ +#include + #include "g4x_dp.h" -#include "i915_drv.h" #include "i915_reg.h" #include "intel_crt.h" #include "intel_crt_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index c8e5ad4c4e79a..8a38df2c0283d 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -36,9 +36,9 @@ #include #include #include +#include #include -#include "i915_drv.h" #include "i915_reg.h" #include "intel_atomic.h" #include "intel_audio.h" diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 1ad6c8a94b3d9..fd92e6b89b431 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -36,9 +36,10 @@ #include #include #include +#include #include -#include "i915_drv.h" +#include "i915_utils.h" #include "i9xx_plane.h" #include "intel_atomic_plane.h" #include "intel_de.h" diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index 684b5d1bc87c1..6426ac6b8c51e 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -4,12 +4,15 @@ */ #include +#include #include +#include +#include #include